1 Set Up

1.1 R Code

#packages we need for this code file
library(ggplot2)
library(mgcv)
library(lubridate)
library(zoo)
library(tidyverse)
library(dplyr)
library(DHARMa)
library(mgcViz)
library(extrafont)
library(arm)
loadfonts()
library(stargazer)
library(ellipse)
#define functions we will need for analysis
#expit function
expit<-function(x){
  return(exp(x)/(1 + exp(x)))
}

#logit function
logit<-function(x){
  return(log(x/(1 - x)))
}

1.2 Data

#read in data
main_analysis_data<-read.csv("./Data/full_data_set_11_29_21_unintentional.csv")

################################## set up data set ################################
#add the intervention dates and time period data
main_analysis_data$Intervention_First_Date<-as.Date(main_analysis_data$Intervention_First_Date)
main_analysis_data$Time_Period_Start<-as.Date(main_analysis_data$Time_Period_Start)
names(main_analysis_data)[which(colnames(main_analysis_data) == "sum_deaths")] <- "imputed_deaths"

################################## set up the Regions ##############################
#set up the regions according to Census: https://www.census.gov/geographies/reference-maps/2010/geo/2010-census-regions-and-divisions-of-the-united-states.html
NE.name <- c("Connecticut","Maine","Massachusetts","New Hampshire",
             "Rhode Island","Vermont","New Jersey","New York",
             "Pennsylvania")

MW.name <- c("Indiana","Illinois","Michigan","Ohio","Wisconsin",
             "Iowa","Kansas","Minnesota","Missouri","Nebraska",
             "North Dakota","South Dakota")

S.name <- c("Delaware","District of Columbia","Florida","Georgia",
            "Maryland","North Carolina","South Carolina","Virginia",
            "West Virginia","Alabama","Kentucky","Mississippi",
            "Tennessee","Arkansas","Louisiana","Oklahoma","Texas")

W.name <- c("Arizona","Colorado","Idaho","New Mexico","Montana",
            "Utah","Nevada","Wyoming","Alaska","California",
            "Hawaii","Oregon","Washington")

region.list <- list(
  Northeast=NE.name,
  Midwest=MW.name,
  South=S.name,
  West=W.name)

#initialize vector with "West" and then impute the other regions for the states
main_analysis_data$Region<-rep("West", nrow(main_analysis_data))
for(state in unique(main_analysis_data$State)){
  if(state %in% region.list$Northeast){
    main_analysis_data$Region[main_analysis_data$State == state]<-"Northeast"
  }else if(state %in% region.list$Midwest){
    main_analysis_data$Region[main_analysis_data$State == state]<-"Midwest"
  }else if(state %in% region.list$South){
    main_analysis_data$Region[main_analysis_data$State == state]<-"South"
  }
}

2 Exploratory Data Analysis

2.1 Overdose Deaths

############################## EDA: Plot the Outcome and Intervention Trends ###############################
#plot the time series of the number of deaths and probability of overdose death
od_data_recent <- read.csv("./Data/od_unintentional_yearly_18_and_up_11_28_21.txt", 
                           sep = "\t", stringsAsFactors = FALSE)
od_data_recent$Deaths <- as.numeric(od_data_recent$Deaths)
od_data_recent<-od_data_recent[!is.na(od_data_recent$Year),] #delete the rows that just contains data set description info
od_data_recent<- od_data_recent %>% 
  filter(Year > 1999 & Year < 2020) %>% 
  group_by(Year) %>%
  summarise(sum_deaths = sum(Deaths, na.rm = TRUE))

# pdf("./Figures/total_od_deaths_all_paper_11_29_21_2000_2019.pdf")
ggplot(data = od_data_recent, mapping = aes(x = Year, y = sum_deaths)) +
  geom_line() + 
  geom_point() +
  labs(x = "Year", y = "Yearly Number of Unintentional Drug Overdose Deaths in the 50 U.S. States") +
  theme(panel.background = element_rect("white"), 
        panel.border = element_blank(), 
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(), 
        axis.line = element_line(colour = "black"),
        axis.title=element_text(family="Times", size=10, face="bold"),
        axis.text=element_text(family="Times",size=10)) +
  scale_x_continuous(breaks = seq(2000, 2020, by = 2)) +
  ylim(c(0, 62000))

# dev.off()

main_analysis_data_sum <- main_analysis_data %>% 
  group_by(year = year(Time_Period_Start)) %>%
  summarise(total_deaths = sum(imputed_deaths),
            sum_pop = sum(population)/2,
            total_prop = sum(imputed_deaths)/(sum(population)/2),
            total_prop_by_100000 = 100000*sum(imputed_deaths)/(sum(population)/2))
# %>%mutate(date = as.Date(as.yearmon(year)))

#compute the percentage difference between 2000 and 2019
death_2000 <- main_analysis_data_sum$total_deaths[main_analysis_data_sum$year == 2000]
death_2019 <- main_analysis_data_sum$total_deaths[main_analysis_data_sum$year == 2019]

((death_2019 - death_2000)/death_2000)*100
## [1] 435.5701

2.2 Intervention: DIH Prosecutions

#plot the number of states with an intervention for each time point
#first, create a data set to find the number of states with an intervention at each time point
#initialize the data set with the start date of the time period
num_states_with_intervention<-data.frame("Start_Date" =
                                  unique((main_analysis_data$Intervention_First_Date[!is.na(main_analysis_data$Intervention_First_Date)])))
numStates<-c()

#for each time period i, we first find the states where the first intervention date occurred before i
#then, we append it to numStates
for(i in unique((num_states_with_intervention$Start_Date))){
  states_w_int<-unique(main_analysis_data$State[(main_analysis_data$Intervention_First_Date)<=i])
  numStates<-append(numStates, length(states_w_int[!is.na(states_w_int)]))
}
num_states_with_intervention$numStates<-numStates
num_states_with_intervention$Start_Date <- as.Date(num_states_with_intervention$Start_Date)
num_states_with_intervention <- rbind(data.frame("Start_Date" = c(as.Date("2000-01-01"),
                                                                  as.Date("2019-12-31")),
                                                 "numStates" = c(0, max(num_states_with_intervention$numStates))),
                                      num_states_with_intervention)
num_states_with_intervention <- num_states_with_intervention %>% 
  arrange(Start_Date) %>%
  mutate(lag_numStates = lag(numStates))

num_states_with_intervention <- num_states_with_intervention %>%
  pivot_longer( c("lag_numStates", "numStates"), "numStates")

# pdf("Figures/num_states_with_intervention_11_29_21.pdf")
ggplot(num_states_with_intervention, aes(x = Start_Date, y = value, group = 1)) +
  geom_line() +
  # geom_point(num_states_with_intervention[num_states_with_intervention$numStates == "numStates",],
  #            mapping = aes(x = Start_Date, y = value, group = 1), size = 1) +
  labs(x = "Year", y = "Cumulative Number of States to have DIH Prosecutions") +
  theme(axis.text=element_text(family="Times",size=10),
        axis.title=element_text(family="Times", size=10, face="bold"),
        panel.border = element_blank(), 
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(), 
        axis.line = element_line(colour = "black"),
        axis.text.x = element_text(family="Times", size=10),
        panel.background = element_rect("white")) +
  scale_x_date(date_labels="%Y", breaks = seq(as.Date("2000-01-01"), as.Date("2018-01-01"), by = "2 years"))

# dev.off()

2.3 Policy Dates

#add the intervention variable as a measure of number of states with DIH prosecution
main_analysis_data <- main_analysis_data %>%
  group_by(Time_Period_Start) %>%
  mutate(num_states_w_intervention = sum(Intervention_Redefined))

############################# Look at the policy dates #######################
policy_dates <- main_analysis_data %>% 
  group_by(State) %>%
  summarise(unique(format(Intervention_First_Date, "%Y-%m")),
            unique(format(as.Date(Naloxone_Pharmacy_Yes_First_Date), "%Y-%m")),
            unique(format(as.Date(Naloxone_Pharmacy_No_First_Date), "%Y-%m")),
            unique(format(as.Date(Medical_Marijuana_First_Date), "%Y-%m")),
            unique(format(as.Date(Recreational_Marijuana_First_Date), "%Y-%m")),
            unique(format(as.Date(PDMP_First_Date), "%Y-%m")),
            unique(format(as.Date(GSL_First_Date), "%Y-%m")),
            unique(format(as.Date(Medicaid_Expansion_First_Date), "%Y-%m")))
names(policy_dates) <- c("State", "DIH Prosecutions", "NAL: Pharmacists Yes",
                         "NAL: Pharmacists No", "MML", "RML", "PDMP", "GSL",
                         "Medicaid")
# write.csv(policy_dates, "./Data/policy_dates_11_29_21.csv")

2.4 Create Plot of Number of DIH Prosecutions Per State

#create a plot for each state to see how many prosecution media alerts there are per 6 month period
#read in the prosecution media alert data
prosecution_data<-read.csv("./Data/dih_prosecutions_9_6_21.csv")

#data cleaning
prosecution_data<-prosecution_data %>% 
  mutate(Date = as.Date(Date.charged, "%m/%d/%Y")) %>%
  mutate(State = ifelse(State.Filed == "pennsylvania", "Pennsylvania", State.Filed),
         State = ifelse(State.Filed == "Virginia ", "Virginia", State)) %>%
  mutate(deceased_age = ifelse(!is.na(as.numeric(Deceased.s.Age)), as.numeric(Deceased.s.Age), 9999)) %>%
  filter(!is.na(Date), 
         State.Filed != "No Info", 
         State.Filed != "No info", 
         State.Filed != "No Info ",
         deceased_age >= 18,
         State != "" ) %>%
  mutate(deceased_age = ifelse(deceased_age == 9999, NA, deceased_age))

#clean up the data by looking at the link to the article
prosecution_data$Date[prosecution_data$Date == "2026-08-01"] <- as.Date("2016-02-15", "%Y-%m-%d")

#change the states into Character instead of factor
prosecution_data$State<-as.character(prosecution_data$State)
#see how many prosecution data points there are for each state
table(prosecution_data$State)
## 
##        Alabama         Alaska        Arizona       Arkansas     California 
##             11              7              7              4             65 
##       Colorado    Connecticut       Delaware        Florida        Georgia 
##             29             43              3            133             27 
##          Idaho       Illinois        Indiana           Iowa         Kansas 
##              8            330             55             31              6 
##       Kentucky      Louisiana          Maine       Maryland  Massachusetts 
##             41             63             17             58             33 
##       Michigan      Minnesota    Mississippi       Missouri        Montana 
##            104            135              1             40             10 
##       Nebraska         Nevada  New Hampshire     New Jersey     New Mexico 
##              1             11             38            134              4 
##       New York North Carolina   North Dakota           Ohio       Oklahoma 
##             96            121             51            379             39 
##         Oregon   Pennsylvania   Rhode Island South Carolina   South Dakota 
##             16            708              2             11             12 
##      Tennessee          Texas           Utah        Vermont       Virginia 
##             94             43             17             12             59 
##     Washington  West Virginia      Wisconsin        Wyoming 
##             63             33            364             19
#there are some repeated cases depending on victim so extract distinct cases
prosecution_data_unique <- prosecution_data %>%
  group_by(State) %>%
  distinct(Accused.Name, Date, .keep_all = T)
table(prosecution_data_unique$State)
## 
##        Alabama         Alaska        Arizona       Arkansas     California 
##             11              7              7              4             61 
##       Colorado    Connecticut       Delaware        Florida        Georgia 
##             27             42              3            129             24 
##          Idaho       Illinois        Indiana           Iowa         Kansas 
##              8            324             53             31              6 
##       Kentucky      Louisiana          Maine       Maryland  Massachusetts 
##             41             63             17             57             33 
##       Michigan      Minnesota    Mississippi       Missouri        Montana 
##            102            135              1             39              9 
##       Nebraska         Nevada  New Hampshire     New Jersey     New Mexico 
##              1             11             38            129              4 
##       New York North Carolina   North Dakota           Ohio       Oklahoma 
##             91            118             38            372             32 
##         Oregon   Pennsylvania   Rhode Island South Carolina   South Dakota 
##             16            700              2             11             12 
##      Tennessee          Texas           Utah        Vermont       Virginia 
##             94             42             17             12             59 
##     Washington  West Virginia      Wisconsin        Wyoming 
##             60             33            357             19
#change date charged into Date object
prosecution_data_unique$Date<-mdy(prosecution_data_unique$Date.charged)

#group the data into six month periods
prosecution_data_unique<-prosecution_data_unique %>% 
  mutate(six_month_pd = lubridate::floor_date(Date , "6 months" ))

prosecution_grouped <- prosecution_data_unique %>% 
  #filter to dates after 2000 and dates before 2020
  filter(year(six_month_pd) >= 2000 & year(six_month_pd) <= 2019) %>%
  group_by(State, six_month_pd) %>% 
  #for each state, for each six month period, count the number of DIH prosecutions
  summarise(num_dih = n()) %>% 
  #ONLY IF GROUPS
  #label the groups according to zero, low, or high
  mutate(group = ifelse(num_dih == 0, "zero", ifelse(num_dih >= 5, "high", "low"))) %>%
  ungroup() %>%
  #have to add in a row for hawaii because its not in the prosecution dataset
  add_row(State = "Hawaii", six_month_pd = as.Date("2000-01-01"), num_dih = 0, group = "zero")

#look at table of ages of the victims
table(as.numeric(prosecution_data_unique$Deceased.s.Age))
## 
##  18  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35  36  37 
##  88 107  88 155 132 150 142 130 152 122 152 110 148  92  96 100  84  97  46  56 
##  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53  54  55  56  57 
##  54  53  61  47  27  37  21  29  23  38  24  29  27  18  14  15  17  15  11  10 
##  58  59  60  61  62  63  64  66  67  68  69  75  85  87 
##  10  22   4   4   7   2   2   2   1   1   3   1   1   1
table(as.numeric(prosecution_data_unique$Deceased.s.Age))/sum(!is.na(as.numeric(prosecution_data_unique$Deceased.s.Age)))
## 
##           18           19           20           21           22           23 
## 0.0305767894 0.0371785962 0.0305767894 0.0538568450 0.0458651842 0.0521195274 
##           24           25           26           27           28           29 
## 0.0493398193 0.0451702571 0.0528144545 0.0423905490 0.0528144545 0.0382209868 
##           30           31           32           33           34           35 
## 0.0514246004 0.0319666435 0.0333564976 0.0347463516 0.0291869354 0.0337039611 
##           36           37           38           39           40           41 
## 0.0159833218 0.0194579569 0.0187630299 0.0184155664 0.0211952745 0.0163307853 
##           42           43           44           45           46           47 
## 0.0093815149 0.0128561501 0.0072967338 0.0100764420 0.0079916609 0.0132036136 
##           48           49           50           51           52           53 
## 0.0083391244 0.0100764420 0.0093815149 0.0062543433 0.0048644892 0.0052119527 
##           54           55           56           57           58           59 
## 0.0059068798 0.0052119527 0.0038220987 0.0034746352 0.0034746352 0.0076441974 
##           60           61           62           63           64           66 
## 0.0013898541 0.0013898541 0.0024322446 0.0006949270 0.0006949270 0.0006949270 
##           67           68           69           75           85           87 
## 0.0003474635 0.0003474635 0.0010423905 0.0003474635 0.0003474635 0.0003474635
#view table of people in each age category, based on prosecution data
prosecution_data_unique <- prosecution_data_unique %>%
  mutate(Deceased.s.Age = as.numeric(Deceased.s.Age),
         age_groups = ifelse(Deceased.s.Age <= 17, "0_17", 
                             ifelse(Deceased.s.Age >= 18 & Deceased.s.Age <= 34, "18_34", 
                                    ifelse(Deceased.s.Age >= 35 & Deceased.s.Age <= 54, "35_54", 
                                           ifelse(Deceased.s.Age >= 55 & Deceased.s.Age <= 64, "55_64", "65_plus")))))

table(prosecution_data_unique$age_groups, exclude = "NA")
## 
##   18_34   35_54   55_64 65_plus    <NA> 
##    2048     733      87      10     624
#we compute the final group for each state by seeing if it ever hits high or low
#if it hits a higher level, then it will remain defined in that higher level
prosecution_grouped_final <- prosecution_grouped %>%  
  group_by(State) %>% 
  summarise(final_gp = ifelse(sum(group == "high") > 0, "high", ifelse(sum(group == "low")> 0, "low", "zero"))) 

#plot of the number of states in each zero/low/high category
ggplot(prosecution_grouped_final, aes(final_gp)) + 
  geom_bar() + 
  labs(title = "Number of States by DIH prosecution Category, with Low = [1,5]") + 
  geom_text(aes(label = ..count..), stat = "count", vjust = -.75)

#number of DIH prosecutions per six month for each state
# pdf("Figures/num_dih_per_six_month_pd_by_state_11_29_21.pdf")
ggplot(prosecution_grouped, aes(x = six_month_pd, y = num_dih)) + 
  geom_bar(stat = "identity") + 
  facet_wrap(~State) + 
  labs(y = "Number of DIH Prosecutions Reported in the Media",
       x = "Date") + 
  theme(axis.text.x = element_text(hjust = 1, size = 6, family = "Times", angle = 30),
        axis.text.y = element_text(size = 6, family = "Times"),
        axis.title = element_text(size = 10, face = "bold", family = "Times"),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        strip.background = element_blank(),
        strip.text = element_text(size=8),
        panel.background = element_rect("white"),
        legend.position = "bottom")

# dev.off()

# write.csv(prosecution_grouped, "./Data/num_dih_per_six_month_pd_by_state_11_12_21.csv")

3 Main Analysis: Effect of At Least One DIH Prosecution Report in Media on Unintentional Overdose Deaths

3.1 Analysis

############################## Run Model with Spline Time Effects by Region ###############################
#model that we will be using for the main analysis
#cr is used for cubic regression spline -- we are smoothing time effects by region
#run the analysis for all the states
main_analysis_model<-gam(cbind(round(imputed_deaths), round(num_alive))~ State +
                           s(Time_Period_ID, bs = "cr", by = as.factor(Region)) +
                           Naloxone_Pharmacy_Yes_Redefined +
                           Naloxone_Pharmacy_No_Redefined +
                           Medical_Marijuana_Redefined +
                           Recreational_Marijuana_Redefined +
                           GSL_Redefined +
                           PDMP_Redefined +
                           Medicaid_Expansion_Redefined +
                           Intervention_Redefined + 
                           num_states_w_intervention,
                         data = main_analysis_data, family = "binomial")

#summary output of the model
stargazer(main_analysis_model, type = "html", dep.var.labels = c("Unintentional Overdose Deaths"))
Dependent variable:
Unintentional Overdose Deaths
StateAlaska 0.277***
(0.028)
StateArizona 0.315***
(0.014)
StateArkansas -0.390***
(0.020)
StateCalifornia -0.159***
(0.013)
StateColorado 0.096***
(0.016)
StateConnecticut 0.189***
(0.016)
StateDelaware 0.430***
(0.022)
StateFlorida 0.253***
(0.012)
StateGeorgia -0.071***
(0.013)
StateHawaii -0.226***
(0.026)
StateIdaho -0.136***
(0.024)
StateIllinois -0.016
(0.013)
StateIndiana 0.088***
(0.014)
StateIowa -0.740***
(0.021)
StateKansas -0.328***
(0.019)
StateKentucky 0.641***
(0.014)
StateLouisiana 0.294***
(0.014)
StateMaine 0.145***
(0.022)
StateMaryland -1.067***
(0.019)
StateMassachusetts 0.208***
(0.014)
StateMichigan -0.019
(0.014)
StateMinnesota -0.618***
(0.017)
StateMississippi -0.100***
(0.018)
StateMissouri 0.194***
(0.015)
StateMontana -0.359***
(0.029)
StateNebraska -0.883***
(0.029)
StateNevada 0.439***
(0.017)
StateNew Hampshire 0.257***
(0.020)
StateNew Jersey 0.107***
(0.013)
StateNew Mexico 0.631***
(0.017)
StateNew York -0.238***
(0.013)
StateNorth Carolina 0.177***
(0.013)
StateNorth Dakota -1.053***
(0.045)
StateOhio 0.455***
(0.012)
StateOklahoma 0.385***
(0.015)
StateOregon -0.194***
(0.018)
StatePennsylvania 0.436***
(0.012)
StateRhode Island 0.238***
(0.022)
StateSouth Carolina 0.221***
(0.015)
StateSouth Dakota -0.960***
(0.043)
StateTennessee 0.436***
(0.013)
StateTexas -0.204***
(0.012)
StateUtah 0.072***
(0.018)
StateVermont -0.167***
(0.031)
StateVirginia -0.112***
(0.014)
StateWashington 0.078***
(0.015)
StateWest Virginia 0.876***
(0.015)
StateWisconsin -0.037**
(0.015)
StateWyoming 0.022
(0.034)
Naloxone_Pharmacy_Yes_Redefined -0.024***
(0.008)
Naloxone_Pharmacy_No_Redefined 0.008
(0.007)
Medical_Marijuana_Redefined 0.063***
(0.006)
Recreational_Marijuana_Redefined -0.037***
(0.009)
GSL_Redefined 0.034***
(0.006)
PDMP_Redefined -0.020***
(0.006)
Medicaid_Expansion_Redefined 0.099***
(0.006)
Intervention_Redefined 0.062***
(0.005)
num_states_w_intervention 0.004**
(0.002)
s(Time_Period_ID):as.factor(Region)Midwest.1
s(Time_Period_ID):as.factor(Region)Midwest.2
s(Time_Period_ID):as.factor(Region)Midwest.3
s(Time_Period_ID):as.factor(Region)Midwest.4
s(Time_Period_ID):as.factor(Region)Midwest.5
s(Time_Period_ID):as.factor(Region)Midwest.6
s(Time_Period_ID):as.factor(Region)Midwest.7
s(Time_Period_ID):as.factor(Region)Midwest.8
s(Time_Period_ID):as.factor(Region)Midwest.9
s(Time_Period_ID):as.factor(Region)Northeast.1
s(Time_Period_ID):as.factor(Region)Northeast.2
s(Time_Period_ID):as.factor(Region)Northeast.3
s(Time_Period_ID):as.factor(Region)Northeast.4
s(Time_Period_ID):as.factor(Region)Northeast.5
s(Time_Period_ID):as.factor(Region)Northeast.6
s(Time_Period_ID):as.factor(Region)Northeast.7
s(Time_Period_ID):as.factor(Region)Northeast.8
s(Time_Period_ID):as.factor(Region)Northeast.9
s(Time_Period_ID):as.factor(Region)South.1
s(Time_Period_ID):as.factor(Region)South.2
s(Time_Period_ID):as.factor(Region)South.3
s(Time_Period_ID):as.factor(Region)South.4
s(Time_Period_ID):as.factor(Region)South.5
s(Time_Period_ID):as.factor(Region)South.6
s(Time_Period_ID):as.factor(Region)South.7
s(Time_Period_ID):as.factor(Region)South.8
s(Time_Period_ID):as.factor(Region)South.9
s(Time_Period_ID):as.factor(Region)West.1
s(Time_Period_ID):as.factor(Region)West.2
s(Time_Period_ID):as.factor(Region)West.3
s(Time_Period_ID):as.factor(Region)West.4
s(Time_Period_ID):as.factor(Region)West.5
s(Time_Period_ID):as.factor(Region)West.6
s(Time_Period_ID):as.factor(Region)West.7
s(Time_Period_ID):as.factor(Region)West.8
s(Time_Period_ID):as.factor(Region)West.9
Constant -9.911***
(0.054)
Observations 2,000
Adjusted R2 0.911
Log Likelihood -16,751.790
UBRE 8.841
Note: p<0.1; p<0.05; p<0.01

3.2 Sandwich Estimator

#here, we estimate the variance-covariance matrix through the sandwich estimator
#we create a function so that we don't have to keep writing the code:
#cov_data is such that rows are state-time combinations and columns are the different policy measures
#coef_values need to be in order of the columns of cov_data
#z_value is the z-value that corresponds to the CI. We default to 95% CI so we default to 1.96
compute_sd_and_CI <- function(cov_data, population, observed_od, predicted_prob_od, coef_values, z_value = 1.96){
  #compute the predicted number of OD
  pred_num_od <- population*predicted_prob_od
  #compute the square of observed number of OD minus predicted number of OD
  obs_minus_pred_num_od_sq <- (observed_od - pred_num_od)^2
  
  #estimate the constant term: sum_{s,t} n_{s,t}z_{s,t}*p_{s,t}*(1-p_{s,t})*z_{s,t}^T
  #initialize the matrix so that we can add the terms to it
  constant_term <- matrix(0, nrow = ncol(cov_data), ncol = ncol(cov_data))
  #estimate middle term: sum_{s,t} x_{s,t}*mean(prop_obs_od - prob_od)^2*x_{s,t}^T
  middle_term <- matrix(0, nrow = ncol(cov_data), ncol = ncol(cov_data))
  for(row in 1:nrow(cov_data)){
    #note here: we first take t(cov_data[row]) since in the matrix, we have the state-time indices in the rows 
    #and the covariates in the columns
    constant_term <- constant_term +
      population[row]*t(cov_data[row,])%*%predicted_prob_od[row]%*%(1-predicted_prob_od[row])%*%as.matrix(cov_data[row,])
    
    middle_term <- middle_term + t(cov_data[row,])%*%obs_minus_pred_num_od_sq[row]%*%as.matrix(cov_data[row,])
  }
  
  #variance-covariance = constant_term^{-1}*middle_term*t(constant_term^{-1})
  #here, solve computes the inverse of the matrix
  var_cov <- solve(constant_term)%*%middle_term%*%t(solve(constant_term))
  #we obtain the standard deviations by taking the square root of the diagonal of the variance-covariance matrix.
  sd_of_coefficients <- sqrt(diag(var_cov))
  
  #find the CI for the coefficients
  lb_coef <- coef_values - z_value*(sd_of_coefficients)
  ub_coef <- coef_values + z_value*(sd_of_coefficients)
  
  return_data_set <- data.frame(lb_coef, coef_values, ub_coef,
             exp_lb = exp(lb_coef), exp_coef = exp(coef_values),
             exp_ub = exp(ub_coef), sd_coef = sd_of_coefficients)
  
  return(return_data_set)
  
}
#we only want to estimate the variances for the policies, intervention variable, and number of states with intervention
subset_cov <- main_analysis_data %>%
  ungroup() %>%
  dplyr::select(
         Naloxone_Pharmacy_Yes_Redefined,
         Naloxone_Pharmacy_No_Redefined,
         Medical_Marijuana_Redefined,
         Recreational_Marijuana_Redefined,
         GSL_Redefined,
         PDMP_Redefined,
         Medicaid_Expansion_Redefined,
         Intervention_Redefined,
         num_states_w_intervention)

#estimate the 95% CI and SD
coefficient_values <- tail(summary(main_analysis_model)$p.coeff, ncol(subset_cov))
pred_prob <- predict(main_analysis_model, newdata = main_analysis_data, type = "response")
main_analysis_sd_and_ci <- compute_sd_and_CI(subset_cov, main_analysis_data$population, main_analysis_data$imputed_deaths,
                  pred_prob, coefficient_values)
main_analysis_sd_and_ci
##                                       lb_coef  coef_values      ub_coef
## Naloxone_Pharmacy_Yes_Redefined  -0.067872997 -0.024109299  0.019654398
## Naloxone_Pharmacy_No_Redefined   -0.021034248  0.008403335  0.037840919
## Medical_Marijuana_Redefined       0.041340673  0.062983026  0.084625379
## Recreational_Marijuana_Redefined -0.069389910 -0.036775616 -0.004161323
## GSL_Redefined                    -0.006809296  0.033853246  0.074515788
## PDMP_Redefined                   -0.050139563 -0.019514080  0.011111403
## Medicaid_Expansion_Redefined      0.071006665  0.099106282  0.127205899
## Intervention_Redefined            0.036522147  0.061825922  0.087129696
## num_states_w_intervention         0.002706159  0.003692466  0.004678773
##                                     exp_lb  exp_coef    exp_ub     sd_coef
## Naloxone_Pharmacy_Yes_Redefined  0.9343791 0.9761790 1.0198488 0.022328417
## Naloxone_Pharmacy_No_Redefined   0.9791854 1.0084387 1.0385660 0.015019175
## Medical_Marijuana_Redefined      1.0422071 1.0650088 1.0883093 0.011042017
## Recreational_Marijuana_Redefined 0.9329628 0.9638924 0.9958473 0.016639946
## GSL_Redefined                    0.9932138 1.0344328 1.0773624 0.020746195
## PDMP_Redefined                   0.9510967 0.9806751 1.0111734 0.015625246
## Medicaid_Expansion_Redefined     1.0735884 1.1041836 1.1356508 0.014336539
## Intervention_Redefined           1.0371973 1.0637771 1.0910382 0.012910089
## num_states_w_intervention        1.0027098 1.0036993 1.0046897 0.000503218

3.3 Plots

#check diagnostics
gam.check(main_analysis_model)

## 
## Method: UBRE   Optimizer: outer newton
## full convergence after 7 iterations.
## Gradient range [5.064278e-08,2.529474e-05]
## (score 8.840989 & scale 1).
## Hessian positive definite, eigenvalue range [8.477283e-05,0.0004428833].
## Model rank =  95 / 95 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                                                k'  edf k-index p-value
## s(Time_Period_ID):as.factor(Region)Midwest   9.00 8.89    1.05    1.00
## s(Time_Period_ID):as.factor(Region)Northeast 9.00 8.96    1.05    0.99
## s(Time_Period_ID):as.factor(Region)South     9.00 8.93    1.05    0.98
## s(Time_Period_ID):as.factor(Region)West      9.00 8.22    1.05    0.98
main_analysis_model_object <- getViz(main_analysis_model)

midwest_plot <- plot(sm(main_analysis_model_object, 1)) +
  l_fitLine() +
  l_ciLine(mul = 5, linetype = 2)  + 
  theme_classic() +
  labs(x = "Time Period", y = "Smoothed Time Effects for Midwest") +
  scale_x_continuous(breaks=c(1,11,21,31), 
                     labels=c("2000", "2005", "2010", "2015"))  +
  theme(text=element_text(family="Times",size=10),
        title = element_text(family="Times", size=10, face = "bold"),
        panel.background = element_rect("white")) +
  ylim(c(-1,1.2))

northeast_plot <- plot(sm(main_analysis_model_object,2)) +
  l_fitLine() +
  l_ciLine(mul = 5, linetype = 2)  + 
  theme_classic() +
  labs(x = "Time Period", y = "Smoothed Time Effects for Northeast") +
  scale_x_continuous(breaks=c(1,11,21,31), 
                     labels=c("2000", "2005", "2010", "2015"))+
  theme(text=element_text(family="Times",size=10),
        title = element_text(family="Times", size=10, face = "bold"),
        panel.background = element_rect("white")) +
  ylim(c(-1,1.2))

south_plot <- plot(sm(main_analysis_model_object, 3)) +
  l_fitLine() +
  l_ciLine(mul = 5, linetype = 2)  + 
  theme_classic() +
  labs(x = "Time Period", y = "Smoothed Time Effects for South") +
  scale_x_continuous(breaks=c(1,11,21,31), 
                     labels=c("2000", "2005","2010", "2015"))+
  theme(text=element_text(family="Times",size=10),
        title = element_text(family="Times", size=10, face = "bold"),
        panel.background = element_rect("white")) +
  ylim(c(-1,1.2))

west_plot <- plot(sm(main_analysis_model_object, 4)) +
  l_fitLine() +
  l_ciLine(mul = 5, linetype = 2)  + theme_classic() +
  labs(x = "Time Period", y = "Smoothed Time Effects for West") +
  scale_x_continuous(breaks=c(1,11,21,31), 
                     labels=c("2000", "2005", "2010", "2015"))+
  theme(text=element_text(family="Times",size=10),
        title = element_text(family="Times", size=10, face = "bold"),
        panel.background = element_rect("white")) +
  ylim(c(-1,1.2))

# pdf("./Figures/time_smoothed_effects_9_6_21.pdf")
gridPrint(midwest_plot, northeast_plot, south_plot, west_plot, ncol = 2)

# dev.off()

total_pop <- main_analysis_data %>% 
  group_by(year = year(Time_Period_Start), State) %>% 
  summarise(pop = unique(population)) %>%
  group_by(year) %>% 
  summarise(sum(pop))

main_analysis_data %>% 
  group_by(year(Time_Period_Start)) %>% 
  summarise(sum_deaths = sum(imputed_deaths)*100000) %>% 
  mutate(sum_deaths/total_pop$`sum(pop)`)
## # A tibble: 20 x 3
##    `year(Time_Period_Start)`  sum_deaths `sum_deaths/total_pop$\`sum(pop)\``
##  *                     <dbl>       <dbl>                               <dbl>
##  1                      2000 1151390000                                 5.52
##  2                      2001 1276465000                                 6.03
##  3                      2002 1614890000                                 7.54
##  4                      2003 1799140000.                                8.31
##  5                      2004 1953250000                                 8.92
##  6                      2005 2216225000                                10.0 
##  7                      2006 2603525000                                11.6 
##  8                      2007 2730800000                                12.0 
##  9                      2008 2787500000                                12.1 
## 10                      2009 2848100000                                12.3 
## 11                      2010 2969200000                                12.7 
## 12                      2011 3276800000                                13.9 
## 13                      2012 3291600000                                13.8 
## 14                      2013 3541000000                                14.7 
## 15                      2014 3847600000                                15.8 
## 16                      2015 4381900000                                17.9 
## 17                      2016 5433100000                                21.9 
## 18                      2017 6084700000                                24.4 
## 19                      2018 5847900000                                23.2 
## 20                      2019 6166500000                                24.3
main_analysis_data %>%
  group_by(State, year(Time_Period_Start)) %>%
  summarise(death_rate = (sum(imputed_deaths)/unique(population))*100000) %>%
  group_by(State) %>%
  summarise(first_death_rate = death_rate[1],
            last_death_rate = death_rate[20]) %>%
  mutate(range_death_rate = last_death_rate - first_death_rate) %>% 
  filter(range_death_rate == min(range_death_rate) | range_death_rate == max(range_death_rate))
## # A tibble: 2 x 4
##   State         first_death_rate last_death_rate range_death_rate
##   <chr>                    <dbl>           <dbl>            <dbl>
## 1 Nebraska                  1.67            8.40             6.74
## 2 West Virginia             3.98           57.6             53.6
# #summarize the DIH dates
# main_analysis_data %>% 
#   group_by(Time_Period_Start) %>%
#   summarise(prop_w_intervention = mean(Intervention_Redefined > 0)) %>%
#   View()

#create a data frame to store the results and compute the confidence intervals
#initialize the columns
main_analysis_plot_table<-data.frame(State = main_analysis_data$State)
main_analysis_plot_table$Fitted<-rep(NA, nrow(main_analysis_plot_table))
main_analysis_plot_table$Observed<-rep(NA, nrow(main_analysis_plot_table))
main_analysis_plot_table$Time<-main_analysis_data$Time_Period_ID
main_analysis_plot_table$Time_Date<-main_analysis_data$Time_Period_Start
main_analysis_plot_table$Intervention_Date<-main_analysis_data$Intervention_First_Date

#we want to compare the fitted probability of overdose death and the observed values to see how the model does as a goodness of fit visual test
for(i in unique(main_analysis_plot_table$State)){
  #for each state, we first subset the main analysis data to only look at the data for that state
  index_of_state<-which(main_analysis_plot_table$State == i)
  #impute the fitted and observed probability of overdose death for the state
  main_analysis_plot_table$Fitted[index_of_state]<-fitted(main_analysis_model)[index_of_state]
  main_analysis_plot_table$Observed[index_of_state] <- (main_analysis_data$imputed_deaths[main_analysis_data$State == i]/main_analysis_data$population[main_analysis_data$State == i])
}
#plot to compare the fitted values vs observed deaths
# pdf("./Figures/GAM_fitted_vs_actual_by_Region_9_6_21_with_int_date_full_data.pdf")
ggplot(data = main_analysis_plot_table, aes(x = Time_Date, y = Observed*100000, group = 1,
                                            color = "Observed")) +
  geom_line(aes(color = "Observed"))+ geom_point(aes(color = "Observed"), size = .5, alpha = .5) +
  geom_line(data = main_analysis_plot_table, aes(x = Time_Date, y = Fitted*100000, group = 1,
                                                 color = "Estimate")) +
  geom_point(data = main_analysis_plot_table, aes(x = Time_Date, y = Fitted*100000,
                                                  color = "Estimate"),
             size = .5, alpha = .5) +
  scale_color_manual(values = c("pink", "blue")) + 
  geom_vline(main_analysis_plot_table, mapping = aes(xintercept = Intervention_Date)) +
  facet_wrap(facets = vars(State), scales = "free_y", ncol = 5) +
  theme(axis.text.x = element_text(hjust = 1, size = 6, family = "Times"),
        axis.text.y = element_text(size = 6, family = "Times"),
        axis.title=element_text(size = 10,face = "bold", family = "Times"),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        strip.background = element_blank(),
        strip.text = element_text(size=8),
        panel.background = element_rect("white"),
        legend.position = "bottom") +
  labs(x = "Year", y = "Unintentional Drug Overdose Death Rates per 100,000 People",
       color = "")

# dev.off()
#diagnostic plots to check model
residTab <- data.frame(logit_fitted_vals = logit(fitted(main_analysis_model)),
                       resids = residuals(main_analysis_model))
# pdf("./Figures/deviance_resids_9_6_21.pdf")
ggplot(residTab, aes(x = logit_fitted_vals, y = resids)) +
  geom_point() +
  theme(text = element_text(size = 10, family = "Times"),
        title = element_text(size = 10, family = "Times", face = "bold"),
        panel.background = element_rect(fill = "white", color = "black")) +
  # theme_classic() +
  labs(x = "Logistic Function of Fitted Values", y = "Deviance Residuals")

# dev.off()

pred_vals <- predict(main_analysis_model)
resids <- resid(main_analysis_model, type = "response")
# pdf("./Figures/binned_resids_plot_9_6_21.pdf")
par(font.lab = 2)
par(family = "Times")
binnedplot(pred_vals, resids, main = "", 
           xlab = "Average Logistic Function of Fitted Values",
           ylab = "Average Residuals")

# dev.off()

3.4 Compile Results

############################## Main Analysis: Make Data Frame of Results and 95% CI ###############################
#store the coefficients into the table
main_analysis_full_table<-data.frame(coef(main_analysis_model))
#check to see how the table looks
head(main_analysis_full_table)
##                 coef.main_analysis_model.
## (Intercept)                   -9.91068390
## StateAlaska                    0.27712443
## StateArizona                   0.31458505
## StateArkansas                 -0.39014795
## StateCalifornia               -0.15908174
## StateColorado                  0.09639187
#rename the column to "Coefficient_Estimate"
colnames(main_analysis_full_table)<-c("Coefficient_Estimate")

#vector of covariates
covariates<-c("Naloxone_Pharmacy_Yes_Redefined", 
              "Naloxone_Pharmacy_No_Redefined",
              "Medical_Marijuana_Redefined",
              "Recreational_Marijuana_Redefined",
              "GSL_Redefined", 
              "PDMP_Redefined",
              "Medicaid_Expansion_Redefined", 
              "Intervention_Redefined", 
              "num_states_w_intervention")

#rename the variable names of the regression output so that they look nicer:
#currently there are 3 types of coefficients: state effects, the covariates, and smoothed time effects
#for each row in the main analysis table
for(i in 1:length(rownames(main_analysis_full_table))){

  #if the coefficient is not in the covariates vector
  if(!(rownames(main_analysis_full_table)[i] %in% covariates)){

    #we see if it's a state effect
    if(substr(rownames(main_analysis_full_table)[i], start = 1, stop = 5) == "State"){

      #if so, here, the names look like: StateMassachusetts or StateGeorgia, so take out the "State" part
      #and just rename these rows to just the state name
      rownames(main_analysis_full_table)[i]<-substr(rownames(main_analysis_full_table)[i], start = 6,
                                                    stop = nchar(rownames(main_analysis_full_table)[i]))

    }else if(rownames(main_analysis_full_table)[i] == "(Intercept)"){

      #otherwise, if the current name is Intercept, we rename it so that we know that Alabama is the baseline
      rownames(main_analysis_full_table)[i]<-"Intercept/Alabama"

    }else if(substr(rownames(main_analysis_full_table)[i], start = 1, stop = 35) == "s(Time_Period_ID):as.factor(Region)"){

      #otherwise, it's the smoothed time effects which look like: s(Time_Period_ID):as.factor(Region)West
      #or s(Time_Period_ID):as.factor(Region)South, so we want to get rid of "s(Time_Period_ID):as.factor(Region)"
      #and change it to "Smoothed Time for Region"
      rownames(main_analysis_full_table)[i]<-paste("Smoothed Time for Region ",
                                                   substr(rownames(main_analysis_full_table)[i], start = 36,
                                                          stop = nchar(rownames(main_analysis_full_table)[i])),
                                                   sep = "")

    }
  }
}

#confidence intervals for the coefficients
main_analysis_full_table$Coefficient_Lower_Bound<-main_analysis_full_table$Coefficient_Estimate - 
  1.96*summary(main_analysis_model)$se
main_analysis_full_table$Coefficient_Upper_Bound<-main_analysis_full_table$Coefficient_Estimate + 
  1.96*summary(main_analysis_model)$se

#impute the estimates and confidence intervals in the odds ratio scale
main_analysis_full_table$Odds_Ratio<-exp(main_analysis_full_table$Coefficient_Estimate)
main_analysis_full_table$Odds_Ratio_LB<-exp(main_analysis_full_table$Coefficient_Lower_Bound)
main_analysis_full_table$Odds_Ratio_UB<-exp(main_analysis_full_table$Coefficient_Upper_Bound)

#store the standard error and p-value
main_analysis_full_table$Standard_Error<-summary(main_analysis_model)$se
#note that there is no p-value for the smoothed time effects, so we put a NA for those rows
main_analysis_full_table$p_value<-c(summary(main_analysis_model)$p.pv, 
                                    rep(NA, length(coef(main_analysis_model)) - 
                                          length(summary(main_analysis_model)$p.pv)))

head(main_analysis_full_table)
##                   Coefficient_Estimate Coefficient_Lower_Bound
## Intercept/Alabama          -9.91068390             -10.0166188
## Alaska                      0.27712443               0.2230357
## Arizona                     0.31458505               0.2872618
## Arkansas                   -0.39014795              -0.4286650
## California                 -0.15908174              -0.1850903
## Colorado                    0.09639187               0.0649615
##                   Coefficient_Upper_Bound   Odds_Ratio Odds_Ratio_LB
## Intercept/Alabama              -9.8047490 4.964147e-05  4.465167e-05
## Alaska                          0.3312131 1.319331e+00  1.249865e+00
## Arizona                         0.3419083 1.369691e+00  1.332773e+00
## Arkansas                       -0.3516309 6.769567e-01  6.513781e-01
## California                     -0.1330732 8.529266e-01  8.310292e-01
## Colorado                        0.1278222 1.101191e+00  1.067118e+00
##                   Odds_Ratio_UB Standard_Error       p_value
## Intercept/Alabama  5.518888e-05     0.05404841  0.000000e+00
## Alaska             1.392657e+00     0.02759627  9.953454e-24
## Arizona            1.407631e+00     0.01394044 9.275181e-113
## Arkansas           7.035397e-01     0.01965153  1.031808e-87
## California         8.754010e-01     0.01326969  4.089372e-33
## Colorado           1.136351e+00     0.01603591  1.843791e-09
tail(main_analysis_full_table)
##                                 Coefficient_Estimate Coefficient_Lower_Bound
## Smoothed Time for Region West.4            0.2352866               0.2060029
## Smoothed Time for Region West.5            0.1835194               0.1298088
## Smoothed Time for Region West.6            0.1609894               0.0862950
## Smoothed Time for Region West.7            0.2035462               0.1160603
## Smoothed Time for Region West.8            0.3083070               0.2015524
## Smoothed Time for Region West.9            0.4223217               0.3307914
##                                 Coefficient_Upper_Bound Odds_Ratio
## Smoothed Time for Region West.4               0.2645704   1.265271
## Smoothed Time for Region West.5               0.2372300   1.201438
## Smoothed Time for Region West.6               0.2356839   1.174673
## Smoothed Time for Region West.7               0.2910320   1.225742
## Smoothed Time for Region West.8               0.4150616   1.361119
## Smoothed Time for Region West.9               0.5138520   1.525499
##                                 Odds_Ratio_LB Odds_Ratio_UB Standard_Error
## Smoothed Time for Region West.4      1.228757      1.302871     0.01494068
## Smoothed Time for Region West.5      1.138611      1.267733     0.02740337
## Smoothed Time for Region West.6      1.090128      1.265774     0.03810940
## Smoothed Time for Region West.7      1.123064      1.337807     0.04463562
## Smoothed Time for Region West.8      1.223300      1.514464     0.05446662
## Smoothed Time for Region West.9      1.392069      1.671718     0.04669916
##                                 p_value
## Smoothed Time for Region West.4      NA
## Smoothed Time for Region West.5      NA
## Smoothed Time for Region West.6      NA
## Smoothed Time for Region West.7      NA
## Smoothed Time for Region West.8      NA
## Smoothed Time for Region West.9      NA
#save the table into a CSV
# write.csv(round(main_analysis_full_table,5), "./Data/coefficients_GAM_9_6_21_full_data_uninentional_od.csv")

#export a table with just the covariates
#first, find the rows that contains the covariates
covariate_Index<-which(rownames(main_analysis_full_table) %in% covariates)
main_analysis_covariate_table<-(round(main_analysis_full_table[covariate_Index,], 5))

#rename the variables so that it looks cleaner
rownames(main_analysis_covariate_table)<-c("Naloxone_Pharmacy_Yes", 
                                           "Naloxone_Pharmacy_No",
                                           "Medical_Marijuana",
                                           "Recreational_Marijuana",
                                           "GSL", 
                                           "PDMP", 
                                           "Medicaid_Expansion",
                                           "Intervention", 
                                           "Number of States with DIH Prosecutions")

#now, reorganize the data so that the covariates are on top and the rest of the variable sare below
main_analysis_covariate_table<-rbind(main_analysis_covariate_table, 
                                     main_analysis_full_table[-covariate_Index,])
#remove the columns that aren't in odds ratio scale
main_analysis_covariate_table<-main_analysis_covariate_table[,-which(colnames(main_analysis_covariate_table) %in%
                                                                       c("Coefficient_Estimate", "Coefficient_Lower_Bound",
                                                                         "Coefficient_Upper_Bound", "Standard_Error"))]

colnames(main_analysis_covariate_table)<-c("Risk_Ratio_Estimates", "RR_95_CI_LB", "RR_95_CI_UB", "p-value")
head(main_analysis_covariate_table, 10)
##                                        Risk_Ratio_Estimates  RR_95_CI_LB
## Naloxone_Pharmacy_Yes                          9.761800e-01 9.614500e-01
## Naloxone_Pharmacy_No                           1.008440e+00 9.956600e-01
## Medical_Marijuana                              1.065010e+00 1.053270e+00
## Recreational_Marijuana                         9.638900e-01 9.477300e-01
## GSL                                            1.034430e+00 1.023090e+00
## PDMP                                           9.806800e-01 9.695800e-01
## Medicaid_Expansion                             1.104180e+00 1.091150e+00
## Intervention                                   1.063780e+00 1.052640e+00
## Number of States with DIH Prosecutions         1.003700e+00 1.000080e+00
## Intercept/Alabama                              4.964147e-05 4.465167e-05
##                                         RR_95_CI_UB p-value
## Naloxone_Pharmacy_Yes                  9.911300e-01 0.00188
## Naloxone_Pharmacy_No                   1.021380e+00 0.19663
## Medical_Marijuana                      1.076870e+00 0.00000
## Recreational_Marijuana                 9.803300e-01 0.00002
## GSL                                    1.045900e+00 0.00000
## PDMP                                   9.919000e-01 0.00078
## Medicaid_Expansion                     1.117380e+00 0.00000
## Intervention                           1.075040e+00 0.00000
## Number of States with DIH Prosecutions 1.007330e+00 0.04512
## Intercept/Alabama                      5.518888e-05 0.00000
#save the table into a CSV
# write.csv(round(main_analysis_covariate_table, 3), "./Data/coefficients_covariates_9_6_21_full_data_unintentional_od.csv")

3.5 Attributable Deaths

############################## Main Analysis: Number of Overdose Deaths Attributed to Intervention ###############################
#find the number of deaths attributable to the intervention
#first, we subset the data so that we only focus on the time points for which at least one state had the intervention
attr_deaths_anlys_main_analysis<-main_analysis_data[which(main_analysis_data$Intervention_Redefined>0),]

#compute the probability of overdose had intervention not occurred
prob_od_no_int_main_analysis<-expit(-coef(main_analysis_model)["Intervention_Redefined"]*attr_deaths_anlys_main_analysis$Intervention_Redefined
                                    + logit(attr_deaths_anlys_main_analysis$imputed_deaths/attr_deaths_anlys_main_analysis$population))

#compute the lower and upper bounds of 95% CI of probability of overdose had intervention not occurred
#here, we compute the lower and upper bounds of the 95% CI of all the coefficients using the standard error from the model
coef_lb<-coef(main_analysis_model) - 1.96*summary(main_analysis_model)$se
coef_ub<-coef(main_analysis_model) + 1.96*summary(main_analysis_model)$se

#we then calculate the upper and lower bounds of the probability of overdose death had intervention not occurred by using
#the lower and upper bounds of the coefficient of the intervention variable
prob_od_no_int_LB_main_analysis<-expit(-coef_lb[names(coef_lb) == "Intervention_Redefined"]*attr_deaths_anlys_main_analysis$Intervention_Redefined
                                       + logit(attr_deaths_anlys_main_analysis$imputed_deaths/attr_deaths_anlys_main_analysis$population))

prob_od_no_int_UB_main_analysis<-expit(-coef_ub[names(coef_ub) == "Intervention_Redefined"]*attr_deaths_anlys_main_analysis$Intervention_Redefined
                                       + logit(attr_deaths_anlys_main_analysis$imputed_deaths/attr_deaths_anlys_main_analysis$population))


#estimate the number of deaths attributable to the intervention
#first, initialize the vectors to store the numbers
num_attr_od_UB<-num_attr_od_LB<-num_attr_od<-rep(NA, length(unique(attr_deaths_anlys_main_analysis$Time_Period_ID)))


#for each time period, we first find the indices of rows containing data from that time point
#then, we find the total number of deaths that attributable to the intervention

index<-1 #keep track of where to store the values in the vector

for(time in sort(unique(attr_deaths_anlys_main_analysis$Time_Period_ID))){
  #find the indices of rows where the time point = time
  time_point_index<-which(attr_deaths_anlys_main_analysis$Time_Period_ID == time)

  #find the number of deaths attributable to intervention = observed number of deaths with intervention - estimated number of deaths had intervention not occurred
  num_attr_od[index]<-sum(attr_deaths_anlys_main_analysis$imputed_deaths[time_point_index]
                          - prob_od_no_int_main_analysis[time_point_index]*attr_deaths_anlys_main_analysis$population[time_point_index])
  #find the lower and upper bounds of the estimated number of deaths attributable to the intervention
  num_attr_od_LB[index]<-sum(attr_deaths_anlys_main_analysis$imputed_deaths[time_point_index]
                             - prob_od_no_int_LB_main_analysis[time_point_index]*attr_deaths_anlys_main_analysis$population[time_point_index])
  num_attr_od_UB[index]<-sum(attr_deaths_anlys_main_analysis$imputed_deaths[time_point_index]
                             - prob_od_no_int_UB_main_analysis[time_point_index]*attr_deaths_anlys_main_analysis$population[time_point_index])
  index<-index + 1
}

num_attr_od_main_analysis<-data.frame("Time_Period_ID" = sort(unique(attr_deaths_anlys_main_analysis$Time_Period_ID)),
                                      "Time_Start" = sort(unique(attr_deaths_anlys_main_analysis$Time_Period_Start)),
                                      "Num_Attr_Deaths" = num_attr_od,
                                      "Num_Attr_Deaths_LB" = num_attr_od_LB,
                                      "Num_Attr_Deaths_UB" = num_attr_od_UB)

#sum up the total number of excess deaths attributable to the intervention
sum(num_attr_od_main_analysis$Num_Attr_Deaths)
## [1] 33114.98
summary(num_attr_od_main_analysis$Num_Attr_Deaths)
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##    9.971  355.713  702.014  827.874 1154.626 1931.467
num_attr_od_main_analysis$Time_Start<-as.Date(num_attr_od_main_analysis$Time_Start)

#compute the 95% CI for the total
sum(num_attr_od_main_analysis$Num_Attr_Deaths_LB)
## [1] 27619.4
sum(num_attr_od_main_analysis$Num_Attr_Deaths_UB)
## [1] 38553.26
#sum up the number of excess deaths per year
yearly_num_Attr_Deaths_main_analysis<-num_attr_od_main_analysis %>%
  group_by("year" = year(Time_Start)) %>%
  summarise("deaths" = sum(Num_Attr_Deaths), death_lb = sum(Num_Attr_Deaths_LB),
            death_ub = sum(Num_Attr_Deaths_UB))

summary(yearly_num_Attr_Deaths_main_analysis$deaths)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   29.82  753.33 1409.54 1655.75 2306.53 3684.37
summary(yearly_num_Attr_Deaths_main_analysis$death_lb)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   24.85  628.27 1175.62 1380.97 1923.79 3073.00
summary(yearly_num_Attr_Deaths_main_analysis$death_ub)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   34.74  877.10 1641.02 1927.66 2685.28 4289.35

4 Secondary Analysis: Effect of At Least One DIH Prosecution Report in Media on All Overdose Deaths

4.1 Analysis

############################## Secondary Analysis: All Overdose Deaths ###############################
od_all_data <- read.csv("./Data/full_data_set_9_7_21_all_od.csv")
colnames(od_all_data)[which(colnames(od_all_data) == "sum_deaths")] <- "imputed_deaths"
od_all_data$Time_Period_Start <- as.Date(od_all_data$Time_Period_Start)
od_all_data$Intervention_First_Date <- as.Date(od_all_data$Intervention_First_Date)

#initialize vector with "West" and then impute the other regions for the states
od_all_data$Region<-rep("West", nrow(od_all_data))
for(state in unique(od_all_data$State)){
  if(state %in% region.list$Northeast){
    od_all_data$Region[od_all_data$State == state]<-"Northeast"
  }else if(state %in% region.list$Midwest){
    od_all_data$Region[od_all_data$State == state]<-"Midwest"
  }else if(state %in% region.list$South){
    od_all_data$Region[od_all_data$State == state]<-"South"
  }
}

#compute the number of states with intervention by adding up the intervention variable
od_all_data <- od_all_data %>%
  group_by(Time_Period_Start) %>%
  mutate(num_states_w_intervention = sum(Intervention_Redefined))

#model that we will be using for the main analysis
#cr is used for cubic regression spline -- we are smoothing time effects by region
#run the analysis for all the states
od_all_model<-gam(cbind(round(imputed_deaths), round(num_alive))~ State +
                    s(Time_Period_ID, bs = "cr", by = as.factor(Region)) +
                    Naloxone_Pharmacy_Yes_Redefined +
                    Naloxone_Pharmacy_No_Redefined +
                    Medical_Marijuana_Redefined +
                    Recreational_Marijuana_Redefined +
                    GSL_Redefined +
                    PDMP_Redefined +
                    Medicaid_Expansion_Redefined +
                    Intervention_Redefined + 
                    num_states_w_intervention,
                  data = od_all_data, family = "binomial")

#summary output of the model
stargazer(od_all_model, type = "html", dep.var.labels = "All Overdose Deaths")
Dependent variable:
All Overdose Deaths
StateAlaska 0.223***
(0.024)
StateArizona 0.321***
(0.012)
StateArkansas -0.123***
(0.016)
StateCalifornia -0.143***
(0.011)
StateColorado 0.163***
(0.014)
StateConnecticut 0.134***
(0.014)
StateDelaware 0.367***
(0.020)
StateFlorida 0.256***
(0.010)
StateGeorgia -0.123***
(0.012)
StateHawaii -0.107***
(0.021)
StateIdaho -0.028
(0.019)
StateIllinois -0.091***
(0.011)
StateIndiana 0.135***
(0.012)
StateIowa -0.590***
(0.018)
StateKansas -0.205***
(0.016)
StateKentucky 0.519***
(0.012)
StateLouisiana 0.271***
(0.012)
StateMaine 0.152***
(0.019)
StateMaryland 0.371***
(0.012)
StateMassachusetts 0.307***
(0.012)
StateMichigan 0.259***
(0.011)
StateMinnesota -0.455***
(0.015)
StateMississippi -0.145***
(0.016)
StateMissouri 0.201***
(0.013)
StateMontana -0.020
(0.023)
StateNebraska -0.630***
(0.023)
StateNevada 0.426***
(0.014)
StateNew Hampshire 0.257***
(0.018)
StateNew Jersey 0.046***
(0.012)
StateNew Mexico 0.561***
(0.015)
StateNew York -0.205***
(0.011)
StateNorth Carolina 0.125***
(0.011)
StateNorth Dakota -0.875***
(0.037)
StateOhio 0.374***
(0.011)
StateOklahoma 0.307***
(0.013)
StateOregon 0.109***
(0.014)
StatePennsylvania 0.364***
(0.011)
StateRhode Island 0.319***
(0.019)
StateSouth Carolina 0.126***
(0.013)
StateSouth Dakota -0.694***
(0.033)
StateTennessee 0.396***
(0.011)
StateTexas -0.249***
(0.011)
StateUtah 0.457***
(0.014)
StateVermont -0.021
(0.026)
StateVirginia -0.146***
(0.012)
StateWashington 0.131***
(0.013)
StateWest Virginia 0.770***
(0.014)
StateWisconsin -0.016
(0.013)
StateWyoming 0.061**
(0.028)
Naloxone_Pharmacy_Yes_Redefined 0.009
(0.007)
Naloxone_Pharmacy_No_Redefined 0.005
(0.006)
Medical_Marijuana_Redefined 0.041***
(0.005)
Recreational_Marijuana_Redefined -0.065***
(0.008)
GSL_Redefined 0.011**
(0.005)
PDMP_Redefined 0.016***
(0.005)
Medicaid_Expansion_Redefined 0.112***
(0.005)
Intervention_Redefined 0.020***
(0.005)
num_states_w_intervention 0.014***
(0.002)
s(Time_Period_ID):as.factor(Region)Midwest.1
s(Time_Period_ID):as.factor(Region)Midwest.2
s(Time_Period_ID):as.factor(Region)Midwest.3
s(Time_Period_ID):as.factor(Region)Midwest.4
s(Time_Period_ID):as.factor(Region)Midwest.5
s(Time_Period_ID):as.factor(Region)Midwest.6
s(Time_Period_ID):as.factor(Region)Midwest.7
s(Time_Period_ID):as.factor(Region)Midwest.8
s(Time_Period_ID):as.factor(Region)Midwest.9
s(Time_Period_ID):as.factor(Region)Northeast.1
s(Time_Period_ID):as.factor(Region)Northeast.2
s(Time_Period_ID):as.factor(Region)Northeast.3
s(Time_Period_ID):as.factor(Region)Northeast.4
s(Time_Period_ID):as.factor(Region)Northeast.5
s(Time_Period_ID):as.factor(Region)Northeast.6
s(Time_Period_ID):as.factor(Region)Northeast.7
s(Time_Period_ID):as.factor(Region)Northeast.8
s(Time_Period_ID):as.factor(Region)Northeast.9
s(Time_Period_ID):as.factor(Region)South.1
s(Time_Period_ID):as.factor(Region)South.2
s(Time_Period_ID):as.factor(Region)South.3
s(Time_Period_ID):as.factor(Region)South.4
s(Time_Period_ID):as.factor(Region)South.5
s(Time_Period_ID):as.factor(Region)South.6
s(Time_Period_ID):as.factor(Region)South.7
s(Time_Period_ID):as.factor(Region)South.8
s(Time_Period_ID):as.factor(Region)South.9
s(Time_Period_ID):as.factor(Region)West.1
s(Time_Period_ID):as.factor(Region)West.2
s(Time_Period_ID):as.factor(Region)West.3
s(Time_Period_ID):as.factor(Region)West.4
s(Time_Period_ID):as.factor(Region)West.5
s(Time_Period_ID):as.factor(Region)West.6
s(Time_Period_ID):as.factor(Region)West.7
s(Time_Period_ID):as.factor(Region)West.8
s(Time_Period_ID):as.factor(Region)West.9
Constant -10.737***
(0.060)
Observations 2,000
Adjusted R2 0.900
Log Likelihood -15,832.420
UBRE 7.507
Note: p<0.1; p<0.05; p<0.01
# gam.check(od_all_model)

4.2 Plots

#create a data frame to store the results and compute the confidence intervals
#initialize the columns
od_all_plot_table<-data.frame(State = od_all_data$State)
od_all_plot_table$Fitted<-rep(NA, nrow(od_all_plot_table))
od_all_plot_table$Observed<-rep(NA, nrow(od_all_plot_table))
od_all_plot_table$Time<-od_all_data$Time_Period_ID
od_all_plot_table$Time_Date<-od_all_data$Time_Period_Start
od_all_plot_table$Intervention_Date<-od_all_data$Intervention_First_Date

#we want to compare the fitted probability of overdose death and the observed values to see how the model does as a goodness of fit visual test
for(i in unique(od_all_plot_table$State)){
  #for each state, we first subset the main analysis data to only look at the data for that state
  index_of_state<-which(od_all_plot_table$State == i)
  #impute the fitted and observed probability of overdose death for the state
  od_all_plot_table$Fitted[index_of_state]<-fitted(od_all_model)[index_of_state]
  od_all_plot_table$Observed[index_of_state]<-(od_all_data$imputed_deaths[od_all_data$State == i]/od_all_data$population[od_all_data$State == i])
}
#plot to compare the fitted values vs observed deaths
ggplot(data = od_all_plot_table) + 
  geom_point(aes(x = Time_Date, y = Observed*100000, group = 1, color = "Observed"), size = 0.5, alpha = 0.5) +
  geom_line(aes(x = Time_Date, y = Observed*100000, group = 1, color = "Observed"))+
  geom_point(data = od_all_plot_table, aes(x = Time_Date, y = Fitted*100000, group = 1, col = "Estimate"), size = 0.5, alpha = 0.5) + 
  geom_line(data = od_all_plot_table, aes(x = Time_Date, y = Fitted*100000, group = 1, col = "Estimate")) +
  geom_vline(od_all_plot_table, mapping = aes(xintercept = Intervention_Date)) +
  facet_wrap(facets = vars(State)) +
  scale_color_manual(values = c("pink", "blue")) + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 7), axis.text.y = element_text(size = 7),
        axis.title=element_text(size = 15,face = "bold"),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        strip.background = element_blank(),
        strip.text = element_text(size=8),panel.background = element_rect("white"),
        legend.position = "bottom") +
  labs(x = "Year", y = "Drug Overdose Death Rates per 100,000 People", color = "")

#plot to show the time smooth effects by region
plot(od_all_model, ylab = "Smoothed Time Effects", xlab = "Time", pages = 1)

#diagnostic plots to check model
gam.check(od_all_model)

## 
## Method: UBRE   Optimizer: outer newton
## full convergence after 8 iterations.
## Gradient range [-6.991359e-08,1.968549e-06]
## (score 7.507021 & scale 1).
## Hessian positive definite, eigenvalue range [3.584438e-05,0.000246841].
## Model rank =  95 / 95 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                                                k'  edf k-index p-value
## s(Time_Period_ID):as.factor(Region)Midwest   9.00 8.86    1.04    0.98
## s(Time_Period_ID):as.factor(Region)Northeast 9.00 8.97    1.04    0.96
## s(Time_Period_ID):as.factor(Region)South     9.00 8.89    1.04    0.95
## s(Time_Period_ID):as.factor(Region)West      9.00 8.70    1.04    0.95
plot(logit(fitted(od_all_model)), residuals(od_all_model),
     xlab = "Logit(Fitted Values)", ylab = "Deviance Residuals" )

4.3 Compile Results

############################## Make Data Frame of Results and 95% CI ###############################
#store the coefficients into the table
od_all_full_table<-data.frame(coef(od_all_model))
#check to see how the table looks
head(od_all_full_table)
##                 coef.od_all_model.
## (Intercept)            -10.7371006
## StateAlaska              0.2226814
## StateArizona             0.3208871
## StateArkansas           -0.1230679
## StateCalifornia         -0.1431041
## StateColorado            0.1629195
#rename the column to "Coefficient_Estimate"
colnames(od_all_full_table)<-c("Coefficient_Estimate")

#vector of covariates
covariates<-c("Naloxone_Pharmacy_Yes_Redefined", 
              "Naloxone_Pharmacy_No_Redefined",
              "Medical_Marijuana_Redefined",
              "Recreational_Marijuana_Redefined",
              "GSL_Redefined", 
              "PDMP_Redefined",
              "Medicaid_Expansion_Redefined", 
              "Intervention_Redefined", 
              "num_states_w_intervention")

#rename the variable names of the regression output so that they look nicer:
#currently there are 3 types of coefficients: state effects, the covariates, and smoothed time effects
#for each row in the main analysis table
for(i in 1:length(rownames(od_all_full_table))){

  #if the coefficient is not in the covariates vector
  if(!(rownames(od_all_full_table)[i] %in% covariates)){

    #we see if it's a state effect
    if(substr(rownames(od_all_full_table)[i], start = 1, stop = 5) == "State"){

      #if so, here, the names look like: StateMassachusetts or StateGeorgia, so take out the "State" part
      #and just rename these rows to just the state name
      rownames(od_all_full_table)[i]<-substr(rownames(od_all_full_table)[i], start = 6,
                                             stop = nchar(rownames(od_all_full_table)[i]))

    }else if(rownames(od_all_full_table)[i] == "(Intercept)"){

      #otherwise, if the current name is Intercept, we rename it so that we know that Alabama is the baseline
      rownames(od_all_full_table)[i]<-"Intercept/Alabama"

    }else if(substr(rownames(od_all_full_table)[i], start = 1, stop = 35) == "s(Time_Period_ID):as.factor(Region)"){

      #otherwise, it's the smoothed time effects which look like: s(Time_Period_ID):as.factor(Region)West
      #or s(Time_Period_ID):as.factor(Region)South, so we want to get rid of "s(Time_Period_ID):as.factor(Region)"
      #and change it to "Smoothed Time for Region"
      rownames(od_all_full_table)[i]<-paste("Smoothed Time for Region ",
                                            substr(rownames(od_all_full_table)[i], start = 36,
                                                   stop = nchar(rownames(od_all_full_table)[i])),
                                            sep = "")

    }
  }
}

#confidence intervals for the coefficients
od_all_full_table$Coefficient_Lower_Bound<-od_all_full_table$Coefficient_Estimate - 1.96*summary(od_all_model)$se
od_all_full_table$Coefficient_Upper_Bound<-od_all_full_table$Coefficient_Estimate + 1.96*summary(od_all_model)$se

#impute the estimates and confidence intervals in the odds ratio scale
od_all_full_table$Odds_Ratio<-exp(od_all_full_table$Coefficient_Estimate)
od_all_full_table$Odds_Ratio_LB<-exp(od_all_full_table$Coefficient_Lower_Bound)
od_all_full_table$Odds_Ratio_UB<-exp(od_all_full_table$Coefficient_Upper_Bound)

#store the standard error and p-value
od_all_full_table$Standard_Error<-summary(od_all_model)$se
#note that there is no p-value for the smoothed time effects, so we put a NA for those rows
od_all_full_table$p_value<-c(summary(od_all_model)$p.pv, 
                             rep(NA, length(coef(od_all_model)) - 
                                   length(summary(od_all_model)$p.pv)))

head(od_all_full_table)
##                   Coefficient_Estimate Coefficient_Lower_Bound
## Intercept/Alabama          -10.7371006             -10.8556442
## Alaska                       0.2226814               0.1749174
## Arizona                      0.3208871               0.2973683
## Arkansas                    -0.1230679              -0.1537843
## California                  -0.1431041              -0.1655486
## Colorado                     0.1629195               0.1359987
##                   Coefficient_Upper_Bound   Odds_Ratio Odds_Ratio_LB
## Intercept/Alabama            -10.61855696 2.172383e-05  1.929539e-05
## Alaska                         0.27044533 1.249422e+00  1.191148e+00
## Arizona                        0.34440588 1.378350e+00  1.346311e+00
## Arkansas                      -0.09235162 8.842036e-01  8.574570e-01
## California                    -0.12065954 8.666638e-01  8.474286e-01
## Colorado                       0.18984025 1.176942e+00  1.145680e+00
##                   Odds_Ratio_UB Standard_Error       p_value
## Intercept/Alabama  2.445791e-05     0.06048143  0.000000e+00
## Alaska             1.310548e+00     0.02436937  6.376260e-20
## Arizona            1.411151e+00     0.01199937 1.530388e-157
## Arkansas           9.117845e-01     0.01567159  4.064260e-15
## California         8.863357e-01     0.01145130  7.776242e-36
## Colorado           1.209056e+00     0.01373510  1.874870e-32
tail(od_all_full_table)
##                                 Coefficient_Estimate Coefficient_Lower_Bound
## Smoothed Time for Region West.4          0.123135742              0.09293469
## Smoothed Time for Region West.5         -0.008299928             -0.06586925
## Smoothed Time for Region West.6         -0.085185636             -0.16641537
## Smoothed Time for Region West.7         -0.120884114             -0.21682692
## Smoothed Time for Region West.8         -0.091545818             -0.20901973
## Smoothed Time for Region West.9          0.044134671             -0.05495089
##                                 Coefficient_Upper_Bound Odds_Ratio
## Smoothed Time for Region West.4             0.153336799  1.1310379
## Smoothed Time for Region West.5             0.049269391  0.9917344
## Smoothed Time for Region West.6            -0.003955896  0.9183418
## Smoothed Time for Region West.7            -0.024941308  0.8861366
## Smoothed Time for Region West.8             0.025928091  0.9125195
## Smoothed Time for Region West.9             0.143220234  1.0451231
##                                 Odds_Ratio_LB Odds_Ratio_UB Standard_Error
## Smoothed Time for Region West.4     1.0973901     1.1657175     0.01540870
## Smoothed Time for Region West.5     0.9362533     1.0505033     0.02937210
## Smoothed Time for Region West.6     0.8466945     0.9960519     0.04144374
## Smoothed Time for Region West.7     0.8050693     0.9753672     0.04895041
## Smoothed Time for Region West.8     0.8113792     1.0262671     0.05993567
## Smoothed Time for Region West.9     0.9465316     1.1539839     0.05055386
##                                 p_value
## Smoothed Time for Region West.4      NA
## Smoothed Time for Region West.5      NA
## Smoothed Time for Region West.6      NA
## Smoothed Time for Region West.7      NA
## Smoothed Time for Region West.8      NA
## Smoothed Time for Region West.9      NA
#save the table into a CSV
# write.csv(round(od_all_full_table,5), "./Data/coefficients_GAM_9_1_20_full_data_all_od.csv")

#export a table with just the covariates
#first, find the rows that contains the covariates
covariate_Index<-which(rownames(od_all_full_table) %in% covariates)
od_all_covariate_table<-(round(od_all_full_table[covariate_Index,], 5))

#rename the variables so that it looks cleaner
rownames(od_all_covariate_table)<-c("Naloxone_Pharmacy_Yes", 
                                    "Naloxone_Pharmacy_No",
                                    "Medical_Marijuana",
                                    "Recreational_Marijuana",
                                    "GSL", 
                                    "PDMP", 
                                    "Medicaid_Expansion",
                                    "Intervention", 
                                    "Number of States with DIH Prosecution")

#now, reorganize the data so that the covariates are on top and the rest of the variable sare below
od_all_covariate_table<-rbind(od_all_covariate_table, od_all_full_table[-covariate_Index,])
#remove the columns that aren't in odds ratio scale
od_all_covariate_table<-od_all_covariate_table[,-which(colnames(od_all_covariate_table) %in%
                                                         c("Coefficient_Estimate", "Coefficient_Lower_Bound", "Coefficient_Upper_Bound", "Standard_Error"))]

colnames(od_all_covariate_table)<-c("Risk_Ratio_Estimates", "RR_95_CI_LB", "RR_95_CI_UB", "p-value")
head(od_all_covariate_table, 11)
##                                       Risk_Ratio_Estimates  RR_95_CI_LB
## Naloxone_Pharmacy_Yes                         1.008540e+00 9.950000e-01
## Naloxone_Pharmacy_No                          1.004760e+00 9.937600e-01
## Medical_Marijuana                             1.042010e+00 1.032090e+00
## Recreational_Marijuana                        9.371300e-01 9.234400e-01
## GSL                                           1.011500e+00 1.001890e+00
## PDMP                                          1.015620e+00 1.006110e+00
## Medicaid_Expansion                            1.118740e+00 1.107440e+00
## Intervention                                  1.019820e+00 1.010750e+00
## Number of States with DIH Prosecution         1.013820e+00 1.009740e+00
## Intercept/Alabama                             2.172383e-05 1.929539e-05
## Alaska                                        1.249422e+00 1.191148e+00
##                                        RR_95_CI_UB     p-value
## Naloxone_Pharmacy_Yes                 1.022250e+00 2.17470e-01
## Naloxone_Pharmacy_No                  1.015880e+00 3.97430e-01
## Medical_Marijuana                     1.052020e+00 0.00000e+00
## Recreational_Marijuana                9.510200e-01 0.00000e+00
## GSL                                   1.021190e+00 1.88600e-02
## PDMP                                  1.025220e+00 1.24000e-03
## Medicaid_Expansion                    1.130140e+00 0.00000e+00
## Intervention                          1.028970e+00 2.00000e-05
## Number of States with DIH Prosecution 1.017910e+00 0.00000e+00
## Intercept/Alabama                     2.445791e-05 0.00000e+00
## Alaska                                1.310548e+00 6.37626e-20
#save the table into a CSV
# write.csv(round(od_all_covariate_table, 5), "./Data/coefficients_covariates_9_6_21_full_data_all_od.csv")

4.4 Attributable Deaths

############################## Number of Overdose Deaths Attributed to Intervention ###############################
#find the number of deaths attributable to the intervention
#first, we subset the data so that we only focus on the time points for which at least one state had the intervention
attr_deaths_anlys_od_all<-od_all_data[which(od_all_data$Intervention_Redefined>0),]

#compute the probability of overdose had intervention not occurred
prob_od_no_int_od_all<-expit(-coef(od_all_model)["Intervention_Redefined"]*attr_deaths_anlys_od_all$Intervention_Redefined
                             + logit(attr_deaths_anlys_od_all$imputed_deaths/attr_deaths_anlys_od_all$population))

#compute the lower and upper bounds of 95% CI of probability of overdose had intervention not occurred
#here, we compute the lower and upper bounds of the 95% CI of all the coefficients using the standard error from the model
coef_lb<-coef(od_all_model) - 1.96*summary(od_all_model)$se
coef_ub<-coef(od_all_model) + 1.96*summary(od_all_model)$se

#we then calculate the upper and lower bounds of the probability of overdose death had intervention not occurred by using
#the lower and upper bounds of the coefficient of the intervention variable
prob_od_no_int_LB_od_all<-expit(-coef_lb[names(coef_lb) == "Intervention_Redefined"]*attr_deaths_anlys_od_all$Intervention_Redefined
                                + logit(attr_deaths_anlys_od_all$imputed_deaths/attr_deaths_anlys_od_all$population))

prob_od_no_int_UB_od_all<-expit(-coef_ub[names(coef_ub) == "Intervention_Redefined"]*attr_deaths_anlys_od_all$Intervention_Redefined
                                + logit(attr_deaths_anlys_od_all$imputed_deaths/attr_deaths_anlys_od_all$population))


#estimate the number of deaths attributable to the intervention
#first, initialize the vectors to store the numbers
num_attr_od_UB<-num_attr_od_LB<-num_attr_od<-rep(NA, length(unique(attr_deaths_anlys_od_all$Time_Period_ID)))


#for each time period, we first find the indices of rows containing data from that time point
#then, we find the total number of deaths that attributable to the intervention

index<-1 #keep track of where to store the values in the vector

for(time in sort(unique(attr_deaths_anlys_od_all$Time_Period_ID))){
  #find the indices of rows where the time point = time
  time_point_index<-which(attr_deaths_anlys_od_all$Time_Period_ID == time)

  #find the number of deaths attributable to intervention = observed number of deaths with intervention - estimated number of deaths had intervention not occurred
  num_attr_od[index]<-sum(attr_deaths_anlys_od_all$imputed_deaths[time_point_index]
                          - prob_od_no_int_od_all[time_point_index]*attr_deaths_anlys_od_all$population[time_point_index])
  #find the lower and upper bounds of the estimated number of deaths attributable to the intervention
  num_attr_od_LB[index]<-sum(attr_deaths_anlys_od_all$imputed_deaths[time_point_index]
                             - prob_od_no_int_LB_od_all[time_point_index]*attr_deaths_anlys_od_all$population[time_point_index])
  num_attr_od_UB[index]<-sum(attr_deaths_anlys_od_all$imputed_deaths[time_point_index]
                             - prob_od_no_int_UB_od_all[time_point_index]*attr_deaths_anlys_od_all$population[time_point_index])
  index<-index + 1
}

num_attr_od_od_all<-data.frame("Time_Period_ID" = sort(unique(attr_deaths_anlys_od_all$Time_Period_ID)),
                               "Time_Start" = sort(unique(attr_deaths_anlys_od_all$Time_Period_Start)),
                               "Num_Attr_Deaths" = num_attr_od,
                               "Num_Attr_Deaths_LB" = num_attr_od_LB,
                               "Num_Attr_Deaths_UB" = num_attr_od_UB)

#sum up the total number of excess deaths attributable to the intervention
sum(num_attr_od_od_all$Num_Attr_Deaths) #16336.4
## [1] 13948.84
summary(num_attr_od_od_all$Num_Attr_Deaths)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   5.022 175.954 309.809 348.721 476.751 742.891
num_attr_od_od_all$Time_Start<-as.Date(num_attr_od_od_all$Time_Start)

#compute the 95% CI for the total
sum(num_attr_od_od_all$Num_Attr_Deaths_LB)
## [1] 7631.079
sum(num_attr_od_od_all$Num_Attr_Deaths_UB)
## [1] 20210.63
#sum up the number of excess deaths per year
yearly_num_Attr_Deaths_od_all<-num_attr_od_od_all %>%
  group_by("year" = year(Time_Start)) %>%
  summarise("deaths" = sum(Num_Attr_Deaths), death_lb = sum(Num_Attr_Deaths_LB),
            death_ub = sum(Num_Attr_Deaths_UB))

summary(yearly_num_Attr_Deaths_od_all$deaths)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   14.87  373.89  620.48  697.44  952.41 1428.40
summary(yearly_num_Attr_Deaths_od_all$death_lb)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   8.131 204.535 339.450 381.554 521.050 781.458
summary(yearly_num_Attr_Deaths_od_all$death_ub)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   21.57  541.77  899.02 1010.53 1379.94 2069.59

5 Sensitivity Analysis 1: Confounding by Indication

5.1 Analysis

############################## Sensitivity Analysis 1: Confounding by Indication Analysis ###############################
#create a variable that is equal to 1 just before intervention
#initialize the column to all zeros
main_analysis_data$justBeforeIntervention<-0

#for each state, we first subset the data so it only contains the state's data
for(state in unique(main_analysis_data$State)){
  state_data<-main_analysis_data[main_analysis_data$State == state,]
  #then, we find the first time point where intervention occurred
  index_first_intervention<-which(state_data$Intervention_Redefined>0)[1]
  #impute a 1 for the time point right before when intervention first occurs
  main_analysis_data$justBeforeIntervention[main_analysis_data$State == state][index_first_intervention-1]<-1
}

#subset the data so that we are only looking at the periods before the intervention occurs for each state
sensitivity_anlys_conf_by_indication_data<-data.frame()
for(state in unique(main_analysis_data$State)){
  state_data<-main_analysis_data[main_analysis_data$State == state,]
  #we don't include these states because Georgia and Ohio have intervention in 2000
  #Hawaii is in this list because it doesn't have an intervention, so we will encounter an error
  #if the state is Hawaii, we'll go to the else if condition
  if(!(state %in% c("Hawaii", "Georgia", "Ohio"))){
    #look for the index where it is just before the intervention
    index_first_intervention<-which(state_data$justBeforeIntervention>0)
    #add the rows that occur before the intervention to the sensitivity analysis data
    sensitivity_anlys_conf_by_indication_data<-rbind(sensitivity_anlys_conf_by_indication_data, state_data[1:(index_first_intervention),])

  }else if(state == "Hawaii"){
    #Hawaii doesn't have an intervention, so we want to include all the rows of Hawaii
    sensitivity_anlys_conf_by_indication_data<-rbind(sensitivity_anlys_conf_by_indication_data, state_data)
  }
}

#run the analysis for the sensitivity analysis
sensitivity_anlys_conf_by_indication<-gam(cbind(round(imputed_deaths), round(num_alive))~ State +
                                            s(Time_Period_ID, bs = "cr", by = as.factor(Region)) +
                                            Naloxone_Pharmacy_Yes_Redefined +
                                            Naloxone_Pharmacy_No_Redefined +
                                            Medical_Marijuana_Redefined +
                                            Recreational_Marijuana_Redefined +
                                            GSL_Redefined +
                                            PDMP_Redefined +
                                            Medicaid_Expansion_Redefined +
                                            justBeforeIntervention + 
                                            num_states_w_intervention,
                                          data = sensitivity_anlys_conf_by_indication_data, family = "binomial")

stargazer(sensitivity_anlys_conf_by_indication, type = "html", dep.var.labels = "Unintentional Overdose Deaths")
Dependent variable:
Unintentional Overdose Deaths
StateAlaska 0.595***
(0.045)
StateArizona 0.601***
(0.028)
StateArkansas -0.324***
(0.037)
StateCalifornia 0.058
(0.047)
StateColorado 0.395***
(0.036)
StateConnecticut 0.855***
(0.052)
StateDelaware 0.288***
(0.039)
StateFlorida 0.702***
(0.048)
StateHawaii -0.070
(0.044)
StateIdaho 0.055
(0.038)
StateIllinois 0.834***
(0.040)
StateIndiana 0.046
(0.033)
StateIowa -0.674***
(0.072)
StateKansas 0.044
(0.043)
StateKentucky 0.772***
(0.028)
StateLouisiana 0.512***
(0.041)
StateMaine 0.498***
(0.059)
StateMaryland -1.267***
(0.099)
StateMassachusetts 0.174***
(0.049)
StateMichigan 0.122***
(0.037)
StateMinnesota -0.425***
(0.042)
StateMississippi 0.122***
(0.031)
StateMissouri 0.404***
(0.037)
StateMontana -0.138
(0.091)
StateNebraska -0.805***
(0.048)
StateNevada 0.873***
(0.043)
StateNew Hampshire 0.313***
(0.055)
StateNew Jersey 0.883***
(0.054)
StateNew Mexico 1.445***
(0.040)
StateNew York 0.271***
(0.047)
StateNorth Carolina 0.450***
(0.032)
StateNorth Dakota -1.145***
(0.079)
StateOklahoma 0.667***
(0.030)
StateOregon 0.146***
(0.041)
StatePennsylvania 1.173***
(0.061)
StateRhode Island 0.113**
(0.057)
StateSouth Carolina 0.297***
(0.029)
StateSouth Dakota -1.012***
(0.065)
StateTennessee 0.570***
(0.029)
StateTexas 0.541***
(0.043)
StateUtah -0.486***
(0.064)
StateVermont 0.215***
(0.070)
StateVirginia 0.400***
(0.039)
StateWashington 0.544***
(0.038)
StateWest Virginia 0.896***
(0.030)
StateWisconsin 0.195***
(0.045)
StateWyoming 0.108*
(0.065)
Naloxone_Pharmacy_Yes_Redefined -0.063
(0.081)
Naloxone_Pharmacy_No_Redefined -0.373***
(0.029)
Medical_Marijuana_Redefined 0.049**
(0.023)
Recreational_Marijuana_Redefined -0.173
(0.127)
GSL_Redefined -0.024
(0.036)
PDMP_Redefined -0.081***
(0.015)
Medicaid_Expansion_Redefined 0.090
(0.065)
justBeforeIntervention -0.051***
(0.012)
num_states_w_intervention -0.017***
(0.004)
s(Time_Period_ID):as.factor(Region)Midwest.1
s(Time_Period_ID):as.factor(Region)Midwest.2
s(Time_Period_ID):as.factor(Region)Midwest.3
s(Time_Period_ID):as.factor(Region)Midwest.4
s(Time_Period_ID):as.factor(Region)Midwest.5
s(Time_Period_ID):as.factor(Region)Midwest.6
s(Time_Period_ID):as.factor(Region)Midwest.7
s(Time_Period_ID):as.factor(Region)Midwest.8
s(Time_Period_ID):as.factor(Region)Midwest.9
s(Time_Period_ID):as.factor(Region)Northeast.1
s(Time_Period_ID):as.factor(Region)Northeast.2
s(Time_Period_ID):as.factor(Region)Northeast.3
s(Time_Period_ID):as.factor(Region)Northeast.4
s(Time_Period_ID):as.factor(Region)Northeast.5
s(Time_Period_ID):as.factor(Region)Northeast.6
s(Time_Period_ID):as.factor(Region)Northeast.7
s(Time_Period_ID):as.factor(Region)Northeast.8
s(Time_Period_ID):as.factor(Region)Northeast.9
s(Time_Period_ID):as.factor(Region)South.1
s(Time_Period_ID):as.factor(Region)South.2
s(Time_Period_ID):as.factor(Region)South.3
s(Time_Period_ID):as.factor(Region)South.4
s(Time_Period_ID):as.factor(Region)South.5
s(Time_Period_ID):as.factor(Region)South.6
s(Time_Period_ID):as.factor(Region)South.7
s(Time_Period_ID):as.factor(Region)South.8
s(Time_Period_ID):as.factor(Region)South.9
s(Time_Period_ID):as.factor(Region)West.1
s(Time_Period_ID):as.factor(Region)West.2
s(Time_Period_ID):as.factor(Region)West.3
s(Time_Period_ID):as.factor(Region)West.4
s(Time_Period_ID):as.factor(Region)West.5
s(Time_Period_ID):as.factor(Region)West.6
s(Time_Period_ID):as.factor(Region)West.7
s(Time_Period_ID):as.factor(Region)West.8
s(Time_Period_ID):as.factor(Region)West.9
Constant -9.941***
(0.068)
Observations 833
Adjusted R2 0.873
Log Likelihood -4,637.128
UBRE 4.048
Note: p<0.1; p<0.05; p<0.01

5.2 Compile Results

############################## Sensitivity Analysis 1: Make Data Frame of Results and 95% CI ###############################
#store the coefficients of all the terms into a table
sensitivity_anlys_conf_by_indication_full_table<-data.frame(coef(sensitivity_anlys_conf_by_indication))
head(sensitivity_anlys_conf_by_indication_full_table)
##                 coef.sensitivity_anlys_conf_by_indication.
## (Intercept)                                    -9.94086992
## StateAlaska                                     0.59454494
## StateArizona                                    0.60071300
## StateArkansas                                  -0.32446412
## StateCalifornia                                 0.05809028
## StateColorado                                   0.39465654
#change the column name to "Coefficient_Estimate"
colnames(sensitivity_anlys_conf_by_indication_full_table)<-c("Coefficient_Estimate")

#store a vector of the covariates
covariates<-c("Naloxone_Pharmacy_Yes_Redefined", 
              "Naloxone_Pharmacy_No_Redefined",
              "Medical_Marijuana_Redefined",
              "Recreational_Marijuana_Redefined",
              "GSL_Redefined", 
              "PDMP_Redefined",
              "Medicaid_Expansion_Redefined", 
              "justBeforeIntervention",
              "num_states_w_intervention")

#rename the variable names of the regression output so that they look nicer:
#currently there are 3 types of coefficients: state effects, the covariates, and smoothed time effects
#for each row in the main analysis table
for(i in 1:length(rownames(sensitivity_anlys_conf_by_indication_full_table))){

  #if the coefficient is not in the covariates vector
  if(!(rownames(sensitivity_anlys_conf_by_indication_full_table)[i] %in% covariates)){

    #we see if it is a state effect
    if(substr(rownames(sensitivity_anlys_conf_by_indication_full_table)[i], start = 1, stop = 5) == "State"){

      #if so, here, the names look like: StateMassachusetts or StateGeorgia, so take out the "State" part
      #and just rename these rows to just the state name
      rownames(sensitivity_anlys_conf_by_indication_full_table)[i]<-substr(rownames(sensitivity_anlys_conf_by_indication_full_table)[i],
                                                                           start = 6,
                                                                           stop =
                                                                        nchar(rownames(sensitivity_anlys_conf_by_indication_full_table)[i]))

    }else if(rownames(sensitivity_anlys_conf_by_indication_full_table)[i] == "(Intercept)"){

      #otherwise, if the current name is Intercept, we rename it so that we know that Alabama is the baseline
      rownames(sensitivity_anlys_conf_by_indication_full_table)[i]<-"Intercept/Alabama"

    }else if(substr(rownames(sensitivity_anlys_conf_by_indication_full_table)[i], start = 1, stop = 35) == "s(Time_Period_ID):as.factor(Region)"){

      #otherwise, it's the smoothed time effects which look like: s(Time_Period_ID):as.factor(Region)West
      #or s(Time_Period_ID):as.factor(Region)South, so we want to get rid of "s(Time_Period_ID):as.factor(Region)"
      #and change it to "Smoothed Time for Region"
      rownames(sensitivity_anlys_conf_by_indication_full_table)[i]<-paste("Smoothed Time for Region ",
                                                                          substr(rownames(sensitivity_anlys_conf_by_indication_full_table)[i], 
                                                                                 start = 36, 
                                                                                 stop = 
                                                                      nchar(rownames(sensitivity_anlys_conf_by_indication_full_table)[i])),
                                                                          sep = "")

    }
  }
}

#store the 95% CI
sensitivity_anlys_conf_by_indication_full_table$Coefficient_Lower_Bound<-sensitivity_anlys_conf_by_indication_full_table$Coefficient_Estimate - 1.96*summary(sensitivity_anlys_conf_by_indication)$se
sensitivity_anlys_conf_by_indication_full_table$Coefficient_Upper_Bound<-sensitivity_anlys_conf_by_indication_full_table$Coefficient_Estimate + 1.96*summary(sensitivity_anlys_conf_by_indication)$se

#store the odds ratio estimates and 95% CI
sensitivity_anlys_conf_by_indication_full_table$Odds_Ratio<-exp(sensitivity_anlys_conf_by_indication_full_table$Coefficient_Estimate)
sensitivity_anlys_conf_by_indication_full_table$Odds_Ratio_LB<-exp(sensitivity_anlys_conf_by_indication_full_table$Coefficient_Lower_Bound)
sensitivity_anlys_conf_by_indication_full_table$Odds_Ratio_UB<-exp(sensitivity_anlys_conf_by_indication_full_table$Coefficient_Upper_Bound)

#store the standard error and p-values
sensitivity_anlys_conf_by_indication_full_table$Standard_Error<-summary(sensitivity_anlys_conf_by_indication)$se
#since there are no p-values for the smoothed time effects, it imputes NA for those rows
sensitivity_anlys_conf_by_indication_full_table$p_value<-c(summary(sensitivity_anlys_conf_by_indication)$p.pv,
                                                           rep(NA, length(coef(sensitivity_anlys_conf_by_indication)) - length(summary(sensitivity_anlys_conf_by_indication)$p.pv)))

head(sensitivity_anlys_conf_by_indication_full_table)
##                   Coefficient_Estimate Coefficient_Lower_Bound
## Intercept/Alabama          -9.94086992            -10.07373090
## Alaska                      0.59454494              0.50698675
## Arizona                     0.60071300              0.54629247
## Arkansas                   -0.32446412             -0.39653324
## California                  0.05809028             -0.03440845
## Colorado                    0.39465654              0.32438942
##                   Coefficient_Upper_Bound   Odds_Ratio Odds_Ratio_LB
## Intercept/Alabama              -9.8080089 4.816539e-05  4.217298e-05
## Alaska                          0.6821031 1.812206e+00  1.660281e+00
## Arizona                         0.6551335 1.823418e+00  1.726839e+00
## Arkansas                       -0.2523950 7.229146e-01  6.726479e-01
## California                      0.1505890 1.059811e+00  9.661768e-01
## Colorado                        0.4649237 1.483874e+00  1.383186e+00
##                   Odds_Ratio_UB Standard_Error       p_value
## Intercept/Alabama  5.500926e-05     0.06778622  0.000000e+00
## Alaska             1.978033e+00     0.04467255  2.053321e-40
## Arizona            1.925400e+00     0.02776558 8.384238e-104
## Arkansas           7.769378e-01     0.03676996  1.102813e-18
## California         1.162519e+00     0.04719322  2.183592e-01
## Colorado           1.591893e+00     0.03585057  3.482289e-28
tail(sensitivity_anlys_conf_by_indication_full_table)
##                                 Coefficient_Estimate Coefficient_Lower_Bound
## Smoothed Time for Region West.4            0.5785156               0.4709646
## Smoothed Time for Region West.5            0.7119943               0.5497326
## Smoothed Time for Region West.6            0.8934090               0.6902182
## Smoothed Time for Region West.7            1.1346202               0.8905588
## Smoothed Time for Region West.8            1.4360230               1.1498232
## Smoothed Time for Region West.9            1.6566192               1.3350400
##                                 Coefficient_Upper_Bound Odds_Ratio
## Smoothed Time for Region West.4               0.6860667   1.783389
## Smoothed Time for Region West.5               0.8742559   2.038052
## Smoothed Time for Region West.6               1.0965998   2.443445
## Smoothed Time for Region West.7               1.3786816   3.109992
## Smoothed Time for Region West.8               1.7222227   4.203943
## Smoothed Time for Region West.9               1.9781985   5.241560
##                                 Odds_Ratio_LB Odds_Ratio_UB Standard_Error
## Smoothed Time for Region West.4      1.601538      1.985889     0.05487299
## Smoothed Time for Region West.5      1.732790      2.397091     0.08278653
## Smoothed Time for Region West.6      1.994151      2.993969     0.10366877
## Smoothed Time for Region West.7      2.436491      3.969665     0.12452113
## Smoothed Time for Region West.8      3.157635      5.596955     0.14602028
## Smoothed Time for Region West.9      3.800148      7.229707     0.16407104
##                                 p_value
## Smoothed Time for Region West.4      NA
## Smoothed Time for Region West.5      NA
## Smoothed Time for Region West.6      NA
## Smoothed Time for Region West.7      NA
## Smoothed Time for Region West.8      NA
## Smoothed Time for Region West.9      NA
#export the sensitivity analysis confounding by indication full table of estimates as CSV
# write.csv(round(sensitivity_anlys_conf_by_indication_full_table,3), "./Data/coefficients_JustBeforeInd_9_6_21_full_data.csv")

#export out a table with just the covariates:
#find the rows in the full table which contain estimates for the covariates and extract them
covariate_Index<-which(rownames(sensitivity_anlys_conf_by_indication_full_table) %in% covariates)
sensitivity_anlys_conf_by_indication_covariate_table<-(round(sensitivity_anlys_conf_by_indication_full_table[covariate_Index,],5))

#rename these variables so they look nicer
rownames(sensitivity_anlys_conf_by_indication_covariate_table)<-c("Naloxone_Pharmacy_Yes", 
                                                                  "Naloxone_Pharmacy_No",
                                                                  "Medical_Marijuana",
                                                                  "Recreational_Marijuana",
                                                                  "GSL", 
                                                                  "PDMP",
                                                                  "Medicaid_Expansion",
                                                                  "Just Before Indicator", 
                                                                  "Number of States w DIH Prosecution")

#put the covariate rows on top and the rest of the data at the bottom
sensitivity_anlys_conf_by_indication_covariate_table<-rbind(sensitivity_anlys_conf_by_indication_covariate_table,
                                                            sensitivity_anlys_conf_by_indication_full_table[-covariate_Index,])

#extract the columns that gives the odds ratio estimates
sensitivity_anlys_conf_by_indication_covariate_table<-sensitivity_anlys_conf_by_indication_covariate_table[,-which(colnames(sensitivity_anlys_conf_by_indication_covariate_table) %in% c("Coefficient_Estimate", "Coefficient_Lower_Bound", "Coefficient_Upper_Bound", "Standard_Error"))]
colnames(sensitivity_anlys_conf_by_indication_covariate_table)<-c("Risk_Ratio_Estimates", "RR_95_CI_LB", "RR_95_CI_UB", "p-value")
head(sensitivity_anlys_conf_by_indication_covariate_table, 10)
##                                    Risk_Ratio_Estimates  RR_95_CI_LB
## Naloxone_Pharmacy_Yes                      9.392000e-01 8.011100e-01
## Naloxone_Pharmacy_No                       6.885300e-01 6.508800e-01
## Medical_Marijuana                          1.049860e+00 1.002820e+00
## Recreational_Marijuana                     8.412200e-01 6.564000e-01
## GSL                                        9.761000e-01 9.104400e-01
## PDMP                                       9.222700e-01 8.955400e-01
## Medicaid_Expansion                         1.093930e+00 9.624700e-01
## Just Before Indicator                      9.499200e-01 9.271800e-01
## Number of States w DIH Prosecution         9.829900e-01 9.756200e-01
## Intercept/Alabama                          4.816539e-05 4.217298e-05
##                                     RR_95_CI_UB p-value
## Naloxone_Pharmacy_Yes              1.101090e+00 0.43945
## Naloxone_Pharmacy_No               7.283700e-01 0.00000
## Medical_Marijuana                  1.099100e+00 0.03749
## Recreational_Marijuana             1.078090e+00 0.17195
## GSL                                1.046500e+00 0.49600
## PDMP                               9.498000e-01 0.00000
## Medicaid_Expansion                 1.243350e+00 0.16932
## Just Before Indicator              9.732000e-01 0.00003
## Number of States w DIH Prosecution 9.904200e-01 0.00001
## Intercept/Alabama                  5.500926e-05 0.00000
#export the covariate table into a CSV file
# write.csv(round(sensitivity_anlys_conf_by_indication_covariate_table,3), "./Data/covariates_just_before_intervention_9_6_21_full_data.csv")

6 Sensitivity Analysis 2: Exclude States with At Least 75% Missing Monthly Data

6.1 Analysis

############################## Exclude States from Analysis Based on Missing Data ###############################
#subset the data to be between 2000 and 2019
overdose_monthly_imputed<-read.csv("./Data/od_data_interpolated_unintentional_1999_2019_age_17_up_9_6_21.csv")
od_2000_to_2019<-overdose_monthly_imputed[overdose_monthly_imputed$Year>=2000 & overdose_monthly_imputed$Year<=2019,]

#convert the date to Date objects and the number of deaths to numeric
od_2000_to_2019$Date<-as.Date(as.yearmon(od_2000_to_2019$Month.Code, format = "%Y/%m"))
od_2000_to_2019$Deaths<-as.numeric(od_2000_to_2019$Deaths)

#plot to look at the trend of the outcome and see how much data is missing
ggplot(od_2000_to_2019, aes(x = Date, y = Deaths)) + geom_line() + facet_wrap(vars(State))

#find the states where the proportion of monthly missing data for years 2000 to 2017 is greater than 75%
statesToExcludeCriteria<-sapply(unique(od_2000_to_2019$State), function(state){
  mean(is.na(od_2000_to_2019$Deaths[od_2000_to_2019$State == state]))>.75})

statesToExclude<-unique(od_2000_to_2019$State)[statesToExcludeCriteria]
#states excluded: [1] "Alaska"       "Montana"      "Nebraska"     "North Dakota" "South Dakota" "Vermont" "Wyoming"

#calculate the median (and IQR) of missingnness from resulting data
summary_missingness <- od_2000_to_2019 %>% 
  group_by(State) %>% 
  summarise(missing = mean(is.na(Deaths)))

median(summary_missingness$missing[!summary_missingness$State %in% statesToExclude])
## [1] 0.02083333
IQR(summary_missingness$missing[!summary_missingness$State %in% statesToExclude])
## [1] 0.1979167
summary(summary_missingness$missing[!summary_missingness$State %in% statesToExclude])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## 0.00000 0.00000 0.02083 0.12791 0.19792 0.64583
#include only the states that are not excluded
sensitivity_anlys_exclude_states_data<-main_analysis_data[!(main_analysis_data$State %in% statesToExclude), ]


sensitivity_anlys_exclude_states_model<-gam(cbind(round(imputed_deaths), round(num_alive))~ State +
                                              s(Time_Period_ID, bs = "cr", by = as.factor(Region))  +
                                              Naloxone_Pharmacy_Yes_Redefined +
                                              Naloxone_Pharmacy_No_Redefined +
                                              Medical_Marijuana_Redefined +
                                              Recreational_Marijuana_Redefined +
                                              GSL_Redefined +
                                              PDMP_Redefined +
                                              Medicaid_Expansion_Redefined +
                                              Intervention_Redefined + 
                                              num_states_w_intervention,
                                            data = sensitivity_anlys_exclude_states_data, family = "binomial")
stargazer(sensitivity_anlys_exclude_states_model, type = "html", dep.var.labels = "Unintentional Overdose Death")
Dependent variable:
Unintentional Overdose Death
StateArizona 0.317***
(0.014)
StateArkansas -0.388***
(0.020)
StateCalifornia -0.161***
(0.013)
StateColorado 0.099***
(0.016)
StateConnecticut 0.185***
(0.016)
StateDelaware 0.433***
(0.022)
StateFlorida 0.252***
(0.012)
StateGeorgia -0.071***
(0.013)
StateHawaii -0.222***
(0.026)
StateIdaho -0.135***
(0.024)
StateIllinois -0.017
(0.013)
StateIndiana 0.087***
(0.014)
StateIowa -0.741***
(0.021)
StateKansas -0.328***
(0.019)
StateKentucky 0.641***
(0.014)
StateLouisiana 0.294***
(0.014)
StateMaine 0.146***
(0.022)
StateMaryland -1.066***
(0.019)
StateMassachusetts 0.207***
(0.014)
StateMichigan -0.020
(0.014)
StateMinnesota -0.617***
(0.017)
StateMississippi -0.098***
(0.018)
StateMissouri 0.196***
(0.015)
StateNevada 0.440***
(0.017)
StateNew Hampshire 0.258***
(0.020)
StateNew Jersey 0.106***
(0.013)
StateNew Mexico 0.628***
(0.017)
StateNew York -0.242***
(0.013)
StateNorth Carolina 0.176***
(0.013)
StateOhio 0.453***
(0.012)
StateOklahoma 0.385***
(0.015)
StateOregon -0.191***
(0.018)
StatePennsylvania 0.434***
(0.012)
StateRhode Island 0.238***
(0.022)
StateSouth Carolina 0.223***
(0.015)
StateTennessee 0.436***
(0.013)
StateTexas -0.205***
(0.012)
StateUtah 0.071***
(0.018)
StateVirginia -0.114***
(0.014)
StateWashington 0.079***
(0.015)
StateWest Virginia 0.877***
(0.015)
StateWisconsin -0.038***
(0.015)
Naloxone_Pharmacy_Yes_Redefined -0.021***
(0.008)
Naloxone_Pharmacy_No_Redefined 0.016**
(0.007)
Medical_Marijuana_Redefined 0.061***
(0.006)
Recreational_Marijuana_Redefined -0.040***
(0.009)
GSL_Redefined 0.034***
(0.006)
PDMP_Redefined -0.016***
(0.006)
Medicaid_Expansion_Redefined 0.099***
(0.006)
Intervention_Redefined 0.066***
(0.005)
num_states_w_intervention 0.004**
(0.002)
s(Time_Period_ID):as.factor(Region)Midwest.1
s(Time_Period_ID):as.factor(Region)Midwest.2
s(Time_Period_ID):as.factor(Region)Midwest.3
s(Time_Period_ID):as.factor(Region)Midwest.4
s(Time_Period_ID):as.factor(Region)Midwest.5
s(Time_Period_ID):as.factor(Region)Midwest.6
s(Time_Period_ID):as.factor(Region)Midwest.7
s(Time_Period_ID):as.factor(Region)Midwest.8
s(Time_Period_ID):as.factor(Region)Midwest.9
s(Time_Period_ID):as.factor(Region)Northeast.1
s(Time_Period_ID):as.factor(Region)Northeast.2
s(Time_Period_ID):as.factor(Region)Northeast.3
s(Time_Period_ID):as.factor(Region)Northeast.4
s(Time_Period_ID):as.factor(Region)Northeast.5
s(Time_Period_ID):as.factor(Region)Northeast.6
s(Time_Period_ID):as.factor(Region)Northeast.7
s(Time_Period_ID):as.factor(Region)Northeast.8
s(Time_Period_ID):as.factor(Region)Northeast.9
s(Time_Period_ID):as.factor(Region)South.1
s(Time_Period_ID):as.factor(Region)South.2
s(Time_Period_ID):as.factor(Region)South.3
s(Time_Period_ID):as.factor(Region)South.4
s(Time_Period_ID):as.factor(Region)South.5
s(Time_Period_ID):as.factor(Region)South.6
s(Time_Period_ID):as.factor(Region)South.7
s(Time_Period_ID):as.factor(Region)South.8
s(Time_Period_ID):as.factor(Region)South.9
s(Time_Period_ID):as.factor(Region)West.1
s(Time_Period_ID):as.factor(Region)West.2
s(Time_Period_ID):as.factor(Region)West.3
s(Time_Period_ID):as.factor(Region)West.4
s(Time_Period_ID):as.factor(Region)West.5
s(Time_Period_ID):as.factor(Region)West.6
s(Time_Period_ID):as.factor(Region)West.7
s(Time_Period_ID):as.factor(Region)West.8
s(Time_Period_ID):as.factor(Region)West.9
Constant -9.919***
(0.054)
Observations 1,720
Adjusted R2 0.910
Log Likelihood -15,830.150
UBRE 10.177
Note: p<0.1; p<0.05; p<0.01

6.2 Compile Results

############################## Make Data Frame of Results and 95% CI ###############################
#store the coefficients into the table
sensitivity_anlys_exclude_states_full_table<-data.frame(coef(sensitivity_anlys_exclude_states_model))
#check to see how the table looks
head(sensitivity_anlys_exclude_states_full_table)
##                  coef.sensitivity_anlys_exclude_states_model.
## (Intercept)                                       -9.91864326
## StateArizona                                       0.31672571
## StateArkansas                                     -0.38808496
## StateCalifornia                                   -0.16089236
## StateColorado                                      0.09931829
## StateConnecticut                                   0.18512065
#rename the column to "Coefficient_Estimate"
colnames(sensitivity_anlys_exclude_states_full_table)<-c("Coefficient_Estimate")

#vector of covariates
covariates<-c("Naloxone_Pharmacy_Yes_Redefined", 
              "Naloxone_Pharmacy_No_Redefined",
              "Medical_Marijuana_Redefined",
              "Recreational_Marijuana_Redefined",
              "GSL_Redefined", 
              "PDMP_Redefined",
              "Medicaid_Expansion_Redefined", 
              "Intervention_Redefined",
              "num_states_w_intervention")

#rename the variable names of the regression output so that they look nicer:
#currently there are 3 types of coefficients: state effects, the covariates, and smoothed time effects
#for each row in the main analysis table
for(i in 1:length(rownames(sensitivity_anlys_exclude_states_full_table))){

  #if the coefficient is not in the covariates vector
  if(!(rownames(sensitivity_anlys_exclude_states_full_table)[i] %in% covariates)){

    #we see if it's a state effect
    if(substr(rownames(sensitivity_anlys_exclude_states_full_table)[i], start = 1, stop = 5) == "State"){

      #if so, here, the names look like: StateMassachusetts or StateGeorgia, so take out the "State" part
      #and just rename these rows to just the state name
      rownames(sensitivity_anlys_exclude_states_full_table)[i]<-substr(rownames(sensitivity_anlys_exclude_states_full_table)[i], 
                                                                       start = 6,
                                                                       stop = nchar(rownames(sensitivity_anlys_exclude_states_full_table)[i]))

    }else if(rownames(sensitivity_anlys_exclude_states_full_table)[i] == "(Intercept)"){

      #otherwise, if the current name is Intercept, we rename it so that we know that Alabama is the baseline
      rownames(sensitivity_anlys_exclude_states_full_table)[i]<-"Intercept/Alabama"

    }else if(substr(rownames(sensitivity_anlys_exclude_states_full_table)[i], start = 1, stop = 35) == "s(Time_Period_ID):as.factor(Region)"){

      #otherwise, it's the smoothed time effects which look like: s(Time_Period_ID):as.factor(Region)West
      #or s(Time_Period_ID):as.factor(Region)South, so we want to get rid of "s(Time_Period_ID):as.factor(Region)"
      #and change it to "Smoothed Time for Region"
      rownames(sensitivity_anlys_exclude_states_full_table)[i]<-paste("Smoothed Time for Region ",
                                                                      substr(rownames(sensitivity_anlys_exclude_states_full_table)[i], 
                                                                             start = 36,
                                                                             stop = nchar(rownames(sensitivity_anlys_exclude_states_full_table)[i])),
                                                                      sep = "")

    }
  }
}

#confidence intervals for the coefficients
sensitivity_anlys_exclude_states_full_table$Coefficient_Lower_Bound<-sensitivity_anlys_exclude_states_full_table$Coefficient_Estimate -
  1.96*summary(sensitivity_anlys_exclude_states_model)$se
sensitivity_anlys_exclude_states_full_table$Coefficient_Upper_Bound<-sensitivity_anlys_exclude_states_full_table$Coefficient_Estimate +
  1.96*summary(sensitivity_anlys_exclude_states_model)$se

#impute the estimates and confidence intervals in the odds ratio scale
sensitivity_anlys_exclude_states_full_table$Odds_Ratio<-exp(sensitivity_anlys_exclude_states_full_table$Coefficient_Estimate)
sensitivity_anlys_exclude_states_full_table$Odds_Ratio_LB<-exp(sensitivity_anlys_exclude_states_full_table$Coefficient_Lower_Bound)
sensitivity_anlys_exclude_states_full_table$Odds_Ratio_UB<-exp(sensitivity_anlys_exclude_states_full_table$Coefficient_Upper_Bound)

#store the standard error and p-value
sensitivity_anlys_exclude_states_full_table$Standard_Error<-summary(sensitivity_anlys_exclude_states_model)$se
#note that there is no p-value for the smoothed time effects, so we put a NA for those rows
sensitivity_anlys_exclude_states_full_table$p_value<-c(summary(sensitivity_anlys_exclude_states_model)$p.pv,
                                                       rep(NA, length(coef(sensitivity_anlys_exclude_states_model)) -
                                                             length(summary(sensitivity_anlys_exclude_states_model)$p.pv)))

head(sensitivity_anlys_exclude_states_full_table)
##                   Coefficient_Estimate Coefficient_Lower_Bound
## Intercept/Alabama          -9.91864326            -10.02510428
## Arizona                     0.31672571              0.28937143
## Arkansas                   -0.38808496             -0.42662480
## California                 -0.16089236             -0.18695392
## Colorado                    0.09931829              0.06782206
## Connecticut                 0.18512065              0.15458896
##                   Coefficient_Upper_Bound   Odds_Ratio Odds_Ratio_LB
## Intercept/Alabama              -9.8121822 4.924793e-05  4.427438e-05
## Arizona                         0.3440800 1.372626e+00  1.335588e+00
## Arkansas                       -0.3495451 6.783547e-01  6.527084e-01
## California                     -0.1348308 8.513837e-01  8.294820e-01
## Colorado                        0.1308145 1.104418e+00  1.070175e+00
## Connecticut                     0.2156523 1.203364e+00  1.167178e+00
##                   Odds_Ratio_UB Standard_Error       p_value
## Intercept/Alabama  5.478017e-05     0.05431685  0.000000e+00
## Arizona            1.410691e+00     0.01395626 5.115891e-114
## Arkansas           7.050087e-01     0.01966319  1.045273e-86
## California         8.738638e-01     0.01329671  1.054074e-33
## Colorado           1.139756e+00     0.01606950  6.388067e-10
## Connecticut        1.240671e+00     0.01557739  1.434599e-32
tail(sensitivity_anlys_exclude_states_full_table)
##                                 Coefficient_Estimate Coefficient_Lower_Bound
## Smoothed Time for Region West.4            0.2242012              0.19471674
## Smoothed Time for Region West.5            0.1737596              0.11973538
## Smoothed Time for Region West.6            0.1469437              0.07186594
## Smoothed Time for Region West.7            0.1925901              0.10452667
## Smoothed Time for Region West.8            0.3045692              0.19727128
## Smoothed Time for Region West.9            0.4229533              0.33082820
##                                 Coefficient_Upper_Bound Odds_Ratio
## Smoothed Time for Region West.4               0.2536857   1.251323
## Smoothed Time for Region West.5               0.2277839   1.189770
## Smoothed Time for Region West.6               0.2220214   1.158289
## Smoothed Time for Region West.7               0.2806536   1.212386
## Smoothed Time for Region West.8               0.4118671   1.356041
## Smoothed Time for Region West.9               0.5150784   1.526463
##                                 Odds_Ratio_LB Odds_Ratio_UB Standard_Error
## Smoothed Time for Region West.4      1.214967      1.288767     0.01504311
## Smoothed Time for Region West.5      1.127199      1.255814     0.02756339
## Smoothed Time for Region West.6      1.074511      1.248598     0.03830496
## Smoothed Time for Region West.7      1.110185      1.323995     0.04493034
## Smoothed Time for Region West.8      1.218074      1.509634     0.05474383
## Smoothed Time for Region West.9      1.392121      1.673770     0.04700260
##                                 p_value
## Smoothed Time for Region West.4      NA
## Smoothed Time for Region West.5      NA
## Smoothed Time for Region West.6      NA
## Smoothed Time for Region West.7      NA
## Smoothed Time for Region West.8      NA
## Smoothed Time for Region West.9      NA
#export a table with just the covariates
#first, find the rows that contains the covariates
covariate_Index<-which(rownames(sensitivity_anlys_exclude_states_full_table) %in% covariates)
sensitivity_anlys_exclude_states_covariate_table<-(round(sensitivity_anlys_exclude_states_full_table[covariate_Index,], 5))

#rename the variables so that it looks cleaner
rownames(sensitivity_anlys_exclude_states_covariate_table)<-c("Naloxone_Pharmacy_Yes", 
                                                              "Naloxone_Pharmacy_No",
                                                              "Medical_Marijuana",
                                                              "Recreational_Marijuana",
                                                              "GSL", 
                                                              "PDMP", 
                                                              "Medicaid_Expansion",
                                                              "Intervention_Redefined",
                                                              "Number of States w DIH Prosecution")

#now, reorganize the data so that the covariates are on top and the rest of the variables are below
sensitivity_anlys_exclude_states_covariate_table<-rbind(sensitivity_anlys_exclude_states_covariate_table, sensitivity_anlys_exclude_states_full_table[-covariate_Index,])
#remove the columns that aren't in odds ratio scale
sensitivity_anlys_exclude_states_covariate_table<-sensitivity_anlys_exclude_states_covariate_table[,-which(colnames(sensitivity_anlys_exclude_states_covariate_table) %in% c("Coefficient_Estimate", "Coefficient_Lower_Bound", "Coefficient_Upper_Bound", "Standard_Error"))]

colnames(sensitivity_anlys_exclude_states_covariate_table)<-c("Risk_Ratio_Estimates", "RR_95_CI_LB", "RR_95_CI_UB", "p-value")
head(sensitivity_anlys_exclude_states_covariate_table, 10)
##                                    Risk_Ratio_Estimates  RR_95_CI_LB
## Naloxone_Pharmacy_Yes                      9.791200e-01 9.642100e-01
## Naloxone_Pharmacy_No                       1.016630e+00 1.003620e+00
## Medical_Marijuana                          1.063350e+00 1.051550e+00
## Recreational_Marijuana                     9.608100e-01 9.444600e-01
## GSL                                        1.034130e+00 1.022700e+00
## PDMP                                       9.839900e-01 9.727000e-01
## Medicaid_Expansion                         1.103900e+00 1.090720e+00
## Intervention_Redefined                     1.068040e+00 1.056700e+00
## Number of States w DIH Prosecution         1.003780e+00 1.000140e+00
## Intercept/Alabama                          4.924793e-05 4.427438e-05
##                                     RR_95_CI_UB p-value
## Naloxone_Pharmacy_Yes              9.942700e-01 0.00708
## Naloxone_Pharmacy_No               1.029800e+00 0.01205
## Medical_Marijuana                  1.075280e+00 0.00000
## Recreational_Marijuana             9.774400e-01 0.00000
## GSL                                1.045690e+00 0.00000
## PDMP                               9.954000e-01 0.00609
## Medicaid_Expansion                 1.117240e+00 0.00000
## Intervention_Redefined             1.079490e+00 0.00000
## Number of States w DIH Prosecution 1.007430e+00 0.04194
## Intercept/Alabama                  5.478017e-05 0.00000
#save the table into a CSV
# write.csv(round(sensitivity_anlys_exclude_states_covariate_table, 3), "./Data/coefficients_covariates_9_6_21_full_data_exclude_states.csv")

6.3 Attributable Deaths

###################################### Attributable Deaths #############################
#first, we subset the data so that we only focus on the time points for which at least one state had the intervention
attr_deaths_anlys_exclude_states<-sensitivity_anlys_exclude_states_data[which(sensitivity_anlys_exclude_states_data$Intervention_Redefined>0),]

#compute the probability of overdose had intervention not occurred
prob_od_no_int_exclude_states<-expit(-coef(sensitivity_anlys_exclude_states_model)["Intervention_Redefined"]*attr_deaths_anlys_exclude_states$Intervention_Redefined
                                     + logit(attr_deaths_anlys_exclude_states$imputed_deaths/attr_deaths_anlys_exclude_states$population))

#compute the lower and upper bounds of 95% CI of probability of overdose had intervention not occurred
#here, we compute the lower and upper bounds of the 95% CI of all the coefficients using the standard error from the model
coef_lb<-coef(sensitivity_anlys_exclude_states_model) - 1.96*summary(sensitivity_anlys_exclude_states_model)$se
coef_ub<-coef(sensitivity_anlys_exclude_states_model) + 1.96*summary(sensitivity_anlys_exclude_states_model)$se

#we then calculate the upper and lower bounds of the probability of overdose death had intervention not occurred by using
#the lower and upper bounds of the coefficient of the intervention variable
prob_od_no_int_LB_exclude_states<-expit(-coef_lb[names(coef_lb) == "Intervention_Redefined"]*attr_deaths_anlys_exclude_states$Intervention_Redefined
                                        + logit(attr_deaths_anlys_exclude_states$imputed_deaths/attr_deaths_anlys_exclude_states$population))

prob_od_no_int_UB_exclude_states<-expit(-coef_ub[names(coef_ub) == "Intervention_Redefined"]*attr_deaths_anlys_exclude_states$Intervention_Redefined
                                        + logit(attr_deaths_anlys_exclude_states$imputed_deaths/attr_deaths_anlys_exclude_states$population))

#estimate the number of deaths attributable to the intervention
#first, initialize the vectors to store the numbers
num_attr_od_UB<-num_attr_od_LB<-num_attr_od<-rep(NA, length(unique(sensitivity_anlys_exclude_states_data$Time_Period_ID)))


#for each time period, we first find the indices of rows containing data from that time point
#then, we find the total number of deaths that attributable to the intervention

index<-1 #keep track of where to store the values in the vector

for(time in sort(unique(attr_deaths_anlys_exclude_states$Time_Period_ID))){
  #find the indices of rows where the time point = time
  time_point_index<-which(attr_deaths_anlys_exclude_states$Time_Period_ID == time)

  #find the number of deaths attributable to intervention = observed number of deaths with intervention - estimated number of deaths had intervention not occurred
  num_attr_od[index]<-sum(attr_deaths_anlys_exclude_states$imputed_deaths[time_point_index]
                          - prob_od_no_int_exclude_states[time_point_index]*attr_deaths_anlys_exclude_states$population[time_point_index])

  #find the lower and upper bounds of the estimated number of deaths attributable to the intervention
  num_attr_od_LB[index]<-sum(attr_deaths_anlys_exclude_states$imputed_deaths[time_point_index]
                             - prob_od_no_int_LB_exclude_states[time_point_index]*attr_deaths_anlys_exclude_states$population[time_point_index])
  num_attr_od_UB[index]<-sum(attr_deaths_anlys_exclude_states$imputed_deaths[time_point_index]
                             - prob_od_no_int_UB_exclude_states[time_point_index]*attr_deaths_anlys_exclude_states$population[time_point_index])


  index<-index + 1
}

num_attr_od_exclude_states<-data.frame("Time_Period_ID" = sort(unique(attr_deaths_anlys_exclude_states$Time_Period_ID)),
                                       "Time_Start" = sort(unique(attr_deaths_anlys_exclude_states$Time_Period_Start)),
                                       "Num_Attr_Deaths" = num_attr_od,
                                       "Num_Attr_Deaths_LB" = num_attr_od_LB,
                                       "Num_Attr_Deaths_UB" = num_attr_od_UB)

#sum up the total number of excess deaths attributable to the intervention
sum(num_attr_od_exclude_states$Num_Attr_Deaths)
## [1] 34907.92
#sum up the number of excess deaths per year
yearly_num_Attr_Deaths_exclude_states<-num_attr_od_exclude_states %>%
  group_by("year" = year(Time_Start)) %>%
  summarise("deaths" = sum(Num_Attr_Deaths), death_lb = sum(Num_Attr_Deaths_LB),
            death_ub = sum(Num_Attr_Deaths_UB))

summary(yearly_num_Attr_Deaths_exclude_states$deaths)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   31.69  797.13 1488.48 1745.40 2428.27 3873.37

7 Sensitivity Analysis 2b: Excluding Different States

7.1 Analysis

############################## Exclude Different States from Analysis ###############################
# main_analysis_data %>% 
#   group_by(State) %>% 
#   summarise(sum_int = sum(Intervention_Redefined), sum_deaths = sum(imputed_deaths)) %>% 
#   arrange(desc(sum_deaths))

# list_of_states <- c("Alabama",
#                       "Alaska",
#                       "Arizona",
#                       "Arkansas",
#                       "California",
#                       "Colorado",
#                       "Connecticut",
#                       "Delaware",
#                       "Florida",
#                       "Georgia",
#                       "Hawaii",
#                       "Idaho",
#                       "Illinois",
#                       "Indiana",
#                       "Iowa",
#                       "Kansas",
#                       "Kentucky",
#                       "Louisiana",
#                       "Maine",
#                       "Maryland",
#                       "Massachusetts",
#                       "Michigan",
#                       "Minnesota",
#                       "Mississippi",
#                       "Missouri",
#                       "Montana",
#                       "Nebraska",
#                       "Nevada",
#                       "New Hampshire",
#                       "New Jersey",
#                       "New Mexico",
#                       "New York",
#                       "North Carolina",
#                       "North Dakota",
#                       "Ohio",
#                       "Oklahoma",
#                       "Oregon",
#                       "Pennsylvania",
#                       "Rhode Island",
#                       "South Carolina",
#                       "South Dakota",
#                       "Tennessee",
#                       "Texas",
#                       "Utah",
#                       "Vermont",
#                       "Virginia",
#                       "Washington",
#                       "West Virginia",
#                       "Wisconsin",
#                       "Wyoming" )

#use R's built-in list of state names
list_of_states <- c("", state.name)
sensitivity_anlys_exclude_diff_states_tab_results <- data.frame(state = c("", state.name), 
                                                                coefficient_intervention = rep(NA, 51),
                                                                std_error = rep(NA, 51),
                                                                p_val = rep(NA, 51))
for(state in list_of_states){
  #include only the states that are not excluded
  
  sensitivity_anlys_exclude_diff_states_data <- main_analysis_data %>%
    ungroup() %>%
    filter(State != state)
  
  sensitivity_anlys_exclude_diff_states_model<-gam(cbind(round(imputed_deaths), round(num_alive))~ State +
                                                     s(Time_Period_ID, bs = "cr", by = as.factor(Region))  +
                                                     Naloxone_Pharmacy_Yes_Redefined +
                                                     Naloxone_Pharmacy_No_Redefined +
                                                     Medical_Marijuana_Redefined +
                                                     Recreational_Marijuana_Redefined +
                                                     GSL_Redefined +
                                                     PDMP_Redefined +
                                                     Medicaid_Expansion_Redefined +
                                                     Intervention_Redefined + 
                                                     num_states_w_intervention,
                                                   data = sensitivity_anlys_exclude_diff_states_data, family = "binomial")
  #obtain the summary of model
  summary_sensitivity_analys_exclude_diff_states <- summary(sensitivity_anlys_exclude_diff_states_model)
  #obtain coefficient of intervention
  sensitivity_anlys_exclude_diff_states_tab_results$coefficient_intervention[sensitivity_anlys_exclude_diff_states_tab_results$state == state] <- summary_sensitivity_analys_exclude_diff_states$p.coeff["Intervention_Redefined"]
  #obtain standard error of intervention
  sensitivity_anlys_exclude_diff_states_tab_results$std_error[sensitivity_anlys_exclude_diff_states_tab_results$state == state] <- summary_sensitivity_analys_exclude_diff_states$se["Intervention_Redefined"]
  #obtain standard error of intervention
  sensitivity_anlys_exclude_diff_states_tab_results$p_val[sensitivity_anlys_exclude_diff_states_tab_results$state == state] <- summary_sensitivity_analys_exclude_diff_states$p.pv["Intervention_Redefined"]
}

sensitivity_anlys_exclude_diff_states_tab_results$state[sensitivity_anlys_exclude_diff_states_tab_results$state == ""] <- "all"
stargazer(sensitivity_anlys_exclude_diff_states_model, type = "html", dep.var.labels = "Unintentional Overdose Death")
Dependent variable:
Unintentional Overdose Death
StateAlaska 0.276***
(0.028)
StateArizona 0.314***
(0.014)
StateArkansas -0.391***
(0.020)
StateCalifornia -0.160***
(0.013)
StateColorado 0.096***
(0.016)
StateConnecticut 0.187***
(0.016)
StateDelaware 0.428***
(0.022)
StateFlorida 0.253***
(0.012)
StateGeorgia -0.071***
(0.013)
StateHawaii -0.227***
(0.026)
StateIdaho -0.136***
(0.024)
StateIllinois -0.017
(0.013)
StateIndiana 0.087***
(0.014)
StateIowa -0.740***
(0.021)
StateKansas -0.327***
(0.019)
StateKentucky 0.640***
(0.014)
StateLouisiana 0.294***
(0.014)
StateMaine 0.145***
(0.022)
StateMaryland -1.067***
(0.019)
StateMassachusetts 0.207***
(0.014)
StateMichigan -0.020
(0.014)
StateMinnesota -0.619***
(0.017)
StateMississippi -0.100***
(0.018)
StateMissouri 0.195***
(0.015)
StateMontana -0.359***
(0.029)
StateNebraska -0.883***
(0.029)
StateNevada 0.439***
(0.017)
StateNew Hampshire 0.257***
(0.020)
StateNew Jersey 0.106***
(0.013)
StateNew Mexico 0.629***
(0.017)
StateNew York -0.240***
(0.013)
StateNorth Carolina 0.177***
(0.013)
StateNorth Dakota -1.054***
(0.045)
StateOhio 0.454***
(0.012)
StateOklahoma 0.385***
(0.015)
StateOregon -0.195***
(0.018)
StatePennsylvania 0.435***
(0.012)
StateRhode Island 0.237***
(0.022)
StateSouth Carolina 0.221***
(0.015)
StateSouth Dakota -0.960***
(0.043)
StateTennessee 0.436***
(0.013)
StateTexas -0.204***
(0.012)
StateUtah 0.073***
(0.018)
StateVermont -0.168***
(0.031)
StateVirginia -0.113***
(0.014)
StateWashington 0.077***
(0.015)
StateWest Virginia 0.875***
(0.015)
StateWisconsin -0.037**
(0.015)
Naloxone_Pharmacy_Yes_Redefined -0.022***
(0.008)
Naloxone_Pharmacy_No_Redefined 0.010
(0.007)
Medical_Marijuana_Redefined 0.063***
(0.006)
Recreational_Marijuana_Redefined -0.037***
(0.009)
GSL_Redefined 0.034***
(0.006)
PDMP_Redefined -0.019***
(0.006)
Medicaid_Expansion_Redefined 0.101***
(0.006)
Intervention_Redefined 0.061***
(0.005)
num_states_w_intervention 0.004**
(0.002)
s(Time_Period_ID):as.factor(Region)Midwest.1
s(Time_Period_ID):as.factor(Region)Midwest.2
s(Time_Period_ID):as.factor(Region)Midwest.3
s(Time_Period_ID):as.factor(Region)Midwest.4
s(Time_Period_ID):as.factor(Region)Midwest.5
s(Time_Period_ID):as.factor(Region)Midwest.6
s(Time_Period_ID):as.factor(Region)Midwest.7
s(Time_Period_ID):as.factor(Region)Midwest.8
s(Time_Period_ID):as.factor(Region)Midwest.9
s(Time_Period_ID):as.factor(Region)Northeast.1
s(Time_Period_ID):as.factor(Region)Northeast.2
s(Time_Period_ID):as.factor(Region)Northeast.3
s(Time_Period_ID):as.factor(Region)Northeast.4
s(Time_Period_ID):as.factor(Region)Northeast.5
s(Time_Period_ID):as.factor(Region)Northeast.6
s(Time_Period_ID):as.factor(Region)Northeast.7
s(Time_Period_ID):as.factor(Region)Northeast.8
s(Time_Period_ID):as.factor(Region)Northeast.9
s(Time_Period_ID):as.factor(Region)South.1
s(Time_Period_ID):as.factor(Region)South.2
s(Time_Period_ID):as.factor(Region)South.3
s(Time_Period_ID):as.factor(Region)South.4
s(Time_Period_ID):as.factor(Region)South.5
s(Time_Period_ID):as.factor(Region)South.6
s(Time_Period_ID):as.factor(Region)South.7
s(Time_Period_ID):as.factor(Region)South.8
s(Time_Period_ID):as.factor(Region)South.9
s(Time_Period_ID):as.factor(Region)West.1
s(Time_Period_ID):as.factor(Region)West.2
s(Time_Period_ID):as.factor(Region)West.3
s(Time_Period_ID):as.factor(Region)West.4
s(Time_Period_ID):as.factor(Region)West.5
s(Time_Period_ID):as.factor(Region)West.6
s(Time_Period_ID):as.factor(Region)West.7
s(Time_Period_ID):as.factor(Region)West.8
s(Time_Period_ID):as.factor(Region)West.9
Constant -9.912***
(0.054)
Observations 1,960
Adjusted R2 0.911
Log Likelihood -16,603.040
UBRE 8.990
Note: p<0.1; p<0.05; p<0.01

7.2 Compile Results

############################## Make Data Frame of Results and 95% CI ###############################
#store the coefficients into the table
sensitivity_anlys_exclude_diff_states_full_table<-data.frame(coef(sensitivity_anlys_exclude_diff_states_model))
#check to see how the table looks
head(sensitivity_anlys_exclude_diff_states_full_table)
##                 coef.sensitivity_anlys_exclude_diff_states_model.
## (Intercept)                                           -9.91163324
## StateAlaska                                            0.27649991
## StateArizona                                           0.31444075
## StateArkansas                                         -0.39099480
## StateCalifornia                                       -0.16008628
## StateColorado                                          0.09582859
#rename the column to "Coefficient_Estimate"
colnames(sensitivity_anlys_exclude_diff_states_full_table)<-c("Coefficient_Estimate")

#vector of covariates
covariates<-c("Naloxone_Pharmacy_Yes_Redefined", 
              "Naloxone_Pharmacy_No_Redefined",
              "Medical_Marijuana_Redefined",
              "Recreational_Marijuana_Redefined",
              "GSL_Redefined", 
              "PDMP_Redefined",
              "Medicaid_Expansion_Redefined", 
              "Intervention_Redefined",
              "num_states_w_intervention")

#rename the variable names of the regression output so that they look nicer:
#currently there are 3 types of coefficients: state effects, the covariates, and smoothed time effects
#for each row in the main analysis table
for(i in 1:length(rownames(sensitivity_anlys_exclude_diff_states_full_table))){

  #if the coefficient is not in the covariates vector
  if(!(rownames(sensitivity_anlys_exclude_diff_states_full_table)[i] %in% covariates)){

    #we see if it's a state effect
    if(substr(rownames(sensitivity_anlys_exclude_diff_states_full_table)[i], start = 1, stop = 5) == "State"){

      #if so, here, the names look like: StateMassachusetts or StateGeorgia, so take out the "State" part
      #and just rename these rows to just the state name
      rownames(sensitivity_anlys_exclude_diff_states_full_table)[i]<-substr(rownames(sensitivity_anlys_exclude_diff_states_full_table)[i], 
                                                                       start = 6,
                                                                       stop = nchar(rownames(sensitivity_anlys_exclude_diff_states_full_table)[i]))

    }else if(rownames(sensitivity_anlys_exclude_diff_states_full_table)[i] == "(Intercept)"){

      #otherwise, if the current name is Intercept, we rename it so that we know that Alabama is the baseline
      rownames(sensitivity_anlys_exclude_diff_states_full_table)[i]<-"Intercept/Alabama"

    }else if(substr(rownames(sensitivity_anlys_exclude_diff_states_full_table)[i], start = 1, stop = 35) == "s(Time_Period_ID):as.factor(Region)"){

      #otherwise, it's the smoothed time effects which look like: s(Time_Period_ID):as.factor(Region)West
      #or s(Time_Period_ID):as.factor(Region)South, so we want to get rid of "s(Time_Period_ID):as.factor(Region)"
      #and change it to "Smoothed Time for Region"
      rownames(sensitivity_anlys_exclude_diff_states_full_table)[i]<-paste("Smoothed Time for Region ",
                                                                      substr(rownames(sensitivity_anlys_exclude_diff_states_full_table)[i], 
                                                                             start = 36,
                                                                             stop = nchar(rownames(sensitivity_anlys_exclude_diff_states_full_table)[i])),
                                                                      sep = "")

    }
  }
}

#confidence intervals for the coefficients
sensitivity_anlys_exclude_diff_states_full_table$Coefficient_Lower_Bound<-sensitivity_anlys_exclude_diff_states_full_table$Coefficient_Estimate -
  1.96*summary(sensitivity_anlys_exclude_diff_states_model)$se
sensitivity_anlys_exclude_diff_states_full_table$Coefficient_Upper_Bound<-sensitivity_anlys_exclude_diff_states_full_table$Coefficient_Estimate +
  1.96*summary(sensitivity_anlys_exclude_diff_states_model)$se

#impute the estimates and confidence intervals in the odds ratio scale
sensitivity_anlys_exclude_diff_states_full_table$Odds_Ratio<-exp(sensitivity_anlys_exclude_diff_states_full_table$Coefficient_Estimate)
sensitivity_anlys_exclude_diff_states_full_table$Odds_Ratio_LB<-exp(sensitivity_anlys_exclude_diff_states_full_table$Coefficient_Lower_Bound)
sensitivity_anlys_exclude_diff_states_full_table$Odds_Ratio_UB<-exp(sensitivity_anlys_exclude_diff_states_full_table$Coefficient_Upper_Bound)

#store the standard error and p-value
sensitivity_anlys_exclude_diff_states_full_table$Standard_Error<-summary(sensitivity_anlys_exclude_diff_states_model)$se
#note that there is no p-value for the smoothed time effects, so we put a NA for those rows
sensitivity_anlys_exclude_diff_states_full_table$p_value<-c(summary(sensitivity_anlys_exclude_diff_states_model)$p.pv,
                                                       rep(NA, length(coef(sensitivity_anlys_exclude_diff_states_model)) -
                                                             length(summary(sensitivity_anlys_exclude_diff_states_model)$p.pv)))

head(sensitivity_anlys_exclude_diff_states_full_table)
##                   Coefficient_Estimate Coefficient_Lower_Bound
## Intercept/Alabama          -9.91163324            -10.01759074
## Alaska                      0.27649991              0.22240681
## Arizona                     0.31444075              0.28711934
## Arkansas                   -0.39099480             -0.42951459
## California                 -0.16008628             -0.18609870
## Colorado                    0.09582859              0.06439146
##                   Coefficient_Upper_Bound   Odds_Ratio Odds_Ratio_LB
## Intercept/Alabama              -9.8056757 4.959437e-05  4.460829e-05
## Alaska                          0.3305930 1.318507e+00  1.249079e+00
## Arizona                         0.3417622 1.369493e+00  1.332583e+00
## Arkansas                       -0.3524750 6.763837e-01  6.508249e-01
## California                     -0.1340739 8.520703e-01  8.301917e-01
## Colorado                        0.1272657 1.100570e+00  1.066510e+00
##                   Odds_Ratio_UB Standard_Error       p_value
## Intercept/Alabama  5.513776e-05     0.05405995  0.000000e+00
## Alaska             1.391793e+00     0.02759852  1.262165e-23
## Arizona            1.407426e+00     0.01393949 1.132387e-112
## Arkansas           7.029461e-01     0.01965296  4.499620e-88
## California         8.745255e-01     0.01327164  1.670923e-33
## Colorado           1.135719e+00     0.01603935  2.306666e-09
tail(sensitivity_anlys_exclude_diff_states_full_table)
##                                 Coefficient_Estimate Coefficient_Lower_Bound
## Smoothed Time for Region West.4            0.2327125              0.20342625
## Smoothed Time for Region West.5            0.1811413              0.12741555
## Smoothed Time for Region West.6            0.1557749              0.08107344
## Smoothed Time for Region West.7            0.1970045              0.10945270
## Smoothed Time for Region West.8            0.3049141              0.19813490
## Smoothed Time for Region West.9            0.4192418              0.32763324
##                                 Coefficient_Upper_Bound Odds_Ratio
## Smoothed Time for Region West.4               0.2619987   1.262019
## Smoothed Time for Region West.5               0.2348670   1.198585
## Smoothed Time for Region West.6               0.2304764   1.168563
## Smoothed Time for Region West.7               0.2845563   1.217750
## Smoothed Time for Region West.8               0.4116933   1.356508
## Smoothed Time for Region West.9               0.5108504   1.520808
##                                 Odds_Ratio_LB Odds_Ratio_UB Standard_Error
## Smoothed Time for Region West.4      1.225595      1.299525     0.01494196
## Smoothed Time for Region West.5      1.135889      1.264741     0.02741110
## Smoothed Time for Region West.6      1.084451      1.259200     0.03811301
## Smoothed Time for Region West.7      1.115667      1.329172     0.04466929
## Smoothed Time for Region West.8      1.219127      1.509371     0.05447918
## Smoothed Time for Region West.9      1.387680      1.666708     0.04673908
##                                 p_value
## Smoothed Time for Region West.4      NA
## Smoothed Time for Region West.5      NA
## Smoothed Time for Region West.6      NA
## Smoothed Time for Region West.7      NA
## Smoothed Time for Region West.8      NA
## Smoothed Time for Region West.9      NA
#export a table with just the covariates
#first, find the rows that contains the covariates
covariate_Index<-which(rownames(sensitivity_anlys_exclude_diff_states_full_table) %in% covariates)
sensitivity_anlys_exclude_diff_states_covariate_table<-(round(sensitivity_anlys_exclude_diff_states_full_table[covariate_Index,], 5))

#rename the variables so that it looks cleaner
rownames(sensitivity_anlys_exclude_diff_states_covariate_table)<-c("Naloxone_Pharmacy_Yes", 
                                                              "Naloxone_Pharmacy_No",
                                                              "Medical_Marijuana",
                                                              "Recreational_Marijuana",
                                                              "GSL", 
                                                              "PDMP", 
                                                              "Medicaid_Expansion",
                                                              "Intervention_Redefined",
                                                              "Number of States w DIH Prosecution")

#now, reorganize the data so that the covariates are on top and the rest of the variables are below
sensitivity_anlys_exclude_diff_states_covariate_table<-rbind(sensitivity_anlys_exclude_diff_states_covariate_table, sensitivity_anlys_exclude_diff_states_full_table[-covariate_Index,])
#remove the columns that aren't in odds ratio scale
sensitivity_anlys_exclude_diff_states_covariate_table<-sensitivity_anlys_exclude_diff_states_covariate_table[,-which(colnames(sensitivity_anlys_exclude_diff_states_covariate_table) %in% c("Coefficient_Estimate", "Coefficient_Lower_Bound", "Coefficient_Upper_Bound", "Standard_Error"))]

colnames(sensitivity_anlys_exclude_diff_states_covariate_table)<-c("Risk_Ratio_Estimates", "RR_95_CI_LB", "RR_95_CI_UB", "p-value")
head(sensitivity_anlys_exclude_diff_states_covariate_table, 10)
##                                    Risk_Ratio_Estimates  RR_95_CI_LB
## Naloxone_Pharmacy_Yes                      9.781900e-01 9.634000e-01
## Naloxone_Pharmacy_No                       1.010420e+00 9.976000e-01
## Medical_Marijuana                          1.065140e+00 1.053410e+00
## Recreational_Marijuana                     9.635700e-01 9.473700e-01
## GSL                                        1.034920e+00 1.023560e+00
## PDMP                                       9.808700e-01 9.697600e-01
## Medicaid_Expansion                         1.105790e+00 1.092700e+00
## Intervention_Redefined                     1.062540e+00 1.051400e+00
## Number of States w DIH Prosecution         1.003730e+00 1.000110e+00
## Intercept/Alabama                          4.959437e-05 4.460829e-05
##                                     RR_95_CI_UB p-value
## Naloxone_Pharmacy_Yes              9.932000e-01 0.00454
## Naloxone_Pharmacy_No               1.023410e+00 0.11160
## Medical_Marijuana                  1.077010e+00 0.00000
## Recreational_Marijuana             9.800500e-01 0.00002
## GSL                                1.046400e+00 0.00000
## PDMP                               9.921000e-01 0.00088
## Medicaid_Expansion                 1.119030e+00 0.00000
## Intervention_Redefined             1.073800e+00 0.00000
## Number of States w DIH Prosecution 1.007370e+00 0.04315
## Intercept/Alabama                  5.513776e-05 0.00000
#save the table into a CSV
# write.csv(round(sensitivity_anlys_exclude_diff_states_covariate_table, 3), "./Data/coefficients_covariates_9_6_21_full_data_exclude_diff_states.csv")

7.3 Attributable Deaths

###################################### Attributable Deaths #############################
#first, we subset the data so that we only focus on the time points for which at least one state had the intervention
attr_deaths_anlys_exclude_diff_states<-sensitivity_anlys_exclude_diff_states_data[which(sensitivity_anlys_exclude_diff_states_data$Intervention_Redefined>0),]

#compute the probability of overdose had intervention not occurred
prob_od_no_int_exclude_diff_states<-expit(-coef(sensitivity_anlys_exclude_diff_states_model)["Intervention_Redefined"]*attr_deaths_anlys_exclude_diff_states$Intervention_Redefined
                                     + logit(attr_deaths_anlys_exclude_diff_states$imputed_deaths/attr_deaths_anlys_exclude_diff_states$population))

#compute the lower and upper bounds of 95% CI of probability of overdose had intervention not occurred
#here, we compute the lower and upper bounds of the 95% CI of all the coefficients using the standard error from the model
coef_lb<-coef(sensitivity_anlys_exclude_diff_states_model) - 1.96*summary(sensitivity_anlys_exclude_diff_states_model)$se
coef_ub<-coef(sensitivity_anlys_exclude_diff_states_model) + 1.96*summary(sensitivity_anlys_exclude_diff_states_model)$se

#we then calculate the upper and lower bounds of the probability of overdose death had intervention not occurred by using
#the lower and upper bounds of the coefficient of the intervention variable
prob_od_no_int_LB_exclude_diff_states<-expit(-coef_lb[names(coef_lb) == "Intervention_Redefined"]*attr_deaths_anlys_exclude_diff_states$Intervention_Redefined
                                        + logit(attr_deaths_anlys_exclude_diff_states$imputed_deaths/attr_deaths_anlys_exclude_diff_states$population))

prob_od_no_int_UB_exclude_diff_states<-expit(-coef_ub[names(coef_ub) == "Intervention_Redefined"]*attr_deaths_anlys_exclude_diff_states$Intervention_Redefined
                                        + logit(attr_deaths_anlys_exclude_diff_states$imputed_deaths/attr_deaths_anlys_exclude_diff_states$population))

#estimate the number of deaths attributable to the intervention
#first, initialize the vectors to store the numbers
num_attr_od_UB<-num_attr_od_LB<-num_attr_od<-rep(NA, length(unique(sensitivity_anlys_exclude_diff_states_data$Time_Period_ID)))


#for each time period, we first find the indices of rows containing data from that time point
#then, we find the total number of deaths that attributable to the intervention

index<-1 #keep track of where to store the values in the vector

for(time in sort(unique(attr_deaths_anlys_exclude_diff_states$Time_Period_ID))){
  #find the indices of rows where the time point = time
  time_point_index<-which(attr_deaths_anlys_exclude_diff_states$Time_Period_ID == time)

  #find the number of deaths attributable to intervention = observed number of deaths with intervention - estimated number of deaths had intervention not occurred
  num_attr_od[index]<-sum(attr_deaths_anlys_exclude_diff_states$imputed_deaths[time_point_index]
                          - prob_od_no_int_exclude_diff_states[time_point_index]*attr_deaths_anlys_exclude_diff_states$population[time_point_index])

  #find the lower and upper bounds of the estimated number of deaths attributable to the intervention
  num_attr_od_LB[index]<-sum(attr_deaths_anlys_exclude_diff_states$imputed_deaths[time_point_index]
                             - prob_od_no_int_LB_exclude_diff_states[time_point_index]*attr_deaths_anlys_exclude_diff_states$population[time_point_index])
  num_attr_od_UB[index]<-sum(attr_deaths_anlys_exclude_diff_states$imputed_deaths[time_point_index]
                             - prob_od_no_int_UB_exclude_diff_states[time_point_index]*attr_deaths_anlys_exclude_diff_states$population[time_point_index])


  index<-index + 1
}

num_attr_od_exclude_diff_states<-data.frame("Time_Period_ID" = sort(unique(attr_deaths_anlys_exclude_diff_states$Time_Period_ID)),
                                       "Time_Start" = sort(unique(attr_deaths_anlys_exclude_diff_states$Time_Period_Start)),
                                       "Num_Attr_Deaths" = num_attr_od,
                                       "Num_Attr_Deaths_LB" = num_attr_od_LB,
                                       "Num_Attr_Deaths_UB" = num_attr_od_UB)

#sum up the total number of excess deaths attributable to the intervention
sum(num_attr_od_exclude_diff_states$Num_Attr_Deaths)
## [1] 32470.68
#sum up the number of excess deaths per year
yearly_num_Attr_Deaths_exclude_diff_states<-num_attr_od_exclude_diff_states %>%
  group_by("year" = year(Time_Start)) %>%
  summarise("deaths" = sum(Num_Attr_Deaths), death_lb = sum(Num_Attr_Deaths_LB),
            death_ub = sum(Num_Attr_Deaths_UB))

summary(yearly_num_Attr_Deaths_exclude_diff_states$deaths)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   29.27  739.61 1381.41 1623.53 2259.55 3613.31

8 Sensitivity Analysis 3: Divide Unintentional Deaths Equally Amongst Missing Months

8.1 Analysis

### Sensitivity Analysis 3: Divide Unaccountable Deaths Equally Among Missing Months #####
od_data <- read.csv("./Data/unintentional_od_1999_2019_age_17_up.txt", sep = "\t", stringsAsFactors = FALSE)
od_data$Deaths <- as.numeric(od_data$Deaths)
od_data<-od_data[!is.na(od_data$Year),] #delete the rows that just contains data set description info
tail(od_data)
##       Notes   State State.Code Year Year.Code      Month Month.Code Deaths
## 12595       Wyoming         56 2019      2019 Jul., 2019    2019/07     NA
## 12596       Wyoming         56 2019      2019 Aug., 2019    2019/08     NA
## 12597       Wyoming         56 2019      2019 Sep., 2019    2019/09     NA
## 12598       Wyoming         56 2019      2019 Oct., 2019    2019/10     NA
## 12599       Wyoming         56 2019      2019 Nov., 2019    2019/11     NA
## 12600       Wyoming         56 2019      2019 Dec., 2019    2019/12     NA
##           Population     Crude.Rate
## 12595 Not Applicable Not Applicable
## 12596 Not Applicable Not Applicable
## 12597 Not Applicable Not Applicable
## 12598 Not Applicable Not Applicable
## 12599 Not Applicable Not Applicable
## 12600 Not Applicable Not Applicable
sum(is.na(od_data$Deaths))
## [1] 3148
#set up the overdose data to impute the missing data
#set up the dates
od_data$Date<-mdy(od_data$Month)
length(unique(od_data$State))
## [1] 50
#DC is being counted in this, but we do not have data on prosecutions. Remove these rows
od_data<-od_data[od_data$State!="District of Columbia",]

#interpolate the missing values
od_year <- read.csv("./Data/unintentional_od_yearly_1999_2019_age_17_up.txt", sep = "\t", stringsAsFactors = FALSE)
od_year$Deaths <- as.numeric(od_year$Deaths)
head(od_year, 50)
##    Notes   State State.Code Year Year.Code Deaths Population Crude.Rate
## 1        Alabama          1 1999      1999    115    3308855        3.5
## 2        Alabama          1 2000      2000    126    3323678        3.8
## 3        Alabama          1 2001      2001    163    3347225        4.9
## 4        Alabama          1 2002      2002    167    3363499        5.0
## 5        Alabama          1 2003      2003    150    3390408        4.4
## 6        Alabama          1 2004      2004    220    3417067        6.4
## 7        Alabama          1 2005      2005    215    3452576        6.2
## 8        Alabama          1 2006      2006    308    3502183        8.8
## 9        Alabama          1 2007      2007    416    3540544       11.7
## 10       Alabama          1 2008      2008    482    3583279       13.5
## 11       Alabama          1 2009      2009    535    3623746       14.8
## 12       Alabama          1 2010      2010    462    3647277       12.7
## 13       Alabama          1 2011      2011    484    3675597       13.2
## 14       Alabama          1 2012      2012    473    3697617       12.8
## 15       Alabama          1 2013      2013    520    3722241       14.0
## 16       Alabama          1 2014      2014    637    3741806       17.0
## 17       Alabama          1 2015      2015    653    3755483       17.4
## 18       Alabama          1 2016      2016    680    3766477       18.1
## 19       Alabama          1 2017      2017    750    3779274       19.8
## 20       Alabama          1 2018      2018    683    3798031       18.0
## 21       Alabama          1 2019      2019    695    3814879       18.2
## 22 Total Alabama          1   NA        NA   8934   75251742       11.9
## 23        Alaska          2 1999      1999     27     433357        6.2
## 24        Alaska          2 2000      2000     35     436215        8.0
## 25        Alaska          2 2001      2001     52     444943       11.7
## 26        Alaska          2 2002      2002     69     453855       15.2
## 27        Alaska          2 2003      2003     69     461571       14.9
## 28        Alaska          2 2004      2004     62     472951       13.1
## 29        Alaska          2 2005      2005     61     481642       12.7
## 30        Alaska          2 2006      2006     66     489722       13.5
## 31        Alaska          2 2007      2007     63     495956       12.7
## 32        Alaska          2 2008      2008     88     504331       17.4
## 33        Alaska          2 2009      2009     99     512544       19.3
## 34        Alaska          2 2010      2010     74     522853       14.2
## 35        Alaska          2 2011      2011     94     534277       17.6
## 36        Alaska          2 2012      2012    112     544349       20.6
## 37        Alaska          2 2013      2013     92     547000       16.8
## 38        Alaska          2 2014      2014    103     550189       18.7
## 39        Alaska          2 2015      2015    105     552166       19.0
## 40        Alaska          2 2016      2016    102     554567       18.4
## 41        Alaska          2 2017      2017    123     554867       22.2
## 42        Alaska          2 2018      2018     94     553622       17.0
## 43        Alaska          2 2019      2019    112     551562       20.3
## 44 Total  Alaska          2   NA        NA   1702   10652539       16.0
## 45       Arizona          4 1999      1999    360    3691428        9.8
## 46       Arizona          4 2000      2000    397    3763685       10.5
## 47       Arizona          4 2001      2001    418    3874462       10.8
## 48       Arizona          4 2002      2002    457    3968317       11.5
## 49       Arizona          4 2003      2003    507    4056693       12.5
## 50       Arizona          4 2004      2004    537    4167950       12.9
#see how many states have missing yearly entries and use the totals to impute the missing yearly values.
sum_na <- od_year %>% group_by(State) %>% summarise(sum(is.na(Deaths)))
table(sum_na)
##                 sum(is.na(Deaths))
## State            0 4 5 72
##                  0 0 0  1
##   Alabama        1 0 0  0
##   Alaska         1 0 0  0
##   Arizona        1 0 0  0
##   Arkansas       1 0 0  0
##   California     1 0 0  0
##   Colorado       1 0 0  0
##   Connecticut    1 0 0  0
##   Delaware       1 0 0  0
##   Florida        1 0 0  0
##   Georgia        1 0 0  0
##   Hawaii         1 0 0  0
##   Idaho          1 0 0  0
##   Illinois       1 0 0  0
##   Indiana        1 0 0  0
##   Iowa           1 0 0  0
##   Kansas         1 0 0  0
##   Kentucky       1 0 0  0
##   Louisiana      1 0 0  0
##   Maine          1 0 0  0
##   Maryland       1 0 0  0
##   Massachusetts  1 0 0  0
##   Michigan       1 0 0  0
##   Minnesota      1 0 0  0
##   Mississippi    1 0 0  0
##   Missouri       1 0 0  0
##   Montana        1 0 0  0
##   Nebraska       1 0 0  0
##   Nevada         1 0 0  0
##   New Hampshire  1 0 0  0
##   New Jersey     1 0 0  0
##   New Mexico     1 0 0  0
##   New York       1 0 0  0
##   North Carolina 1 0 0  0
##   North Dakota   0 1 0  0
##   Ohio           1 0 0  0
##   Oklahoma       1 0 0  0
##   Oregon         1 0 0  0
##   Pennsylvania   1 0 0  0
##   Rhode Island   0 1 0  0
##   South Carolina 1 0 0  0
##   South Dakota   0 0 1  0
##   Tennessee      1 0 0  0
##   Texas          1 0 0  0
##   Utah           1 0 0  0
##   Vermont        1 0 0  0
##   Virginia       1 0 0  0
##   Washington     1 0 0  0
##   West Virginia  1 0 0  0
##   Wisconsin      1 0 0  0
##   Wyoming        1 0 0  0
#the 3 states are: North Dakota, South Dakota, and Rhode Island
od_year$Deaths[od_year$State == "North Dakota" & is.na(od_year$Deaths)] <- (od_year$Deaths[od_year$State == "North Dakota" & 
                                                                                             od_year$Notes == "Total"] -
                                                                              sum(od_year$Deaths[od_year$State == "North Dakota" &
                                                                                                   od_year$Notes != "Total"], 
                                                                                  na.rm = TRUE))/sum(is.na(od_year$Deaths[od_year$State == "North Dakota"]))
od_year$Deaths[od_year$State == "South Dakota" & is.na(od_year$Deaths)] <- (od_year$Deaths[od_year$State == "South Dakota" &
                                                                                             od_year$Notes == "Total"] -
                                                                              sum(od_year$Deaths[od_year$State == "South Dakota" &
                                                                                                   od_year$Notes != "Total"], 
                                                                                  na.rm = TRUE))/sum(is.na(od_year$Deaths[od_year$State == "South Dakota"]))
od_year$Deaths[od_year$State == "Rhode Island" & is.na(od_year$Deaths)] <- (od_year$Deaths[od_year$State == "Rhode Island" & 
                                                                                             od_year$Notes == "Total"] -
                                                                              sum(od_year$Deaths[od_year$State == "Rhode Island" &
                                                                                                   od_year$Notes != "Total"], 
                                                                                  na.rm = TRUE))/sum(is.na(od_year$Deaths[od_year$State == "Rhode Island"]))
od_year <- od_year[!is.na(od_year$Year),]
tail(od_year)
##      Notes   State State.Code Year Year.Code Deaths Population Crude.Rate
## 1094       Wyoming         56 2014      2014     87     445830       19.5
## 1095       Wyoming         56 2015      2015     79     447212       17.7
## 1096       Wyoming         56 2016      2016     79     446600       17.7
## 1097       Wyoming         56 2017      2017     53     442832       12.0
## 1098       Wyoming         56 2018      2018     54     442962       12.2
## 1099       Wyoming         56 2019      2019     68     445025       15.3
od_data$imputed_vals<-rep(NA, nrow(od_data))

startYear<-1999

for(state in unique(od_data$State)){
  #get the values of the deaths for state
  currentDeaths<-od_data$Deaths[od_data$State == state]
  for(year in startYear:2019){
    #find the indices of the missing data -- gets the missing indices for that particular year
    indexMissing <- which(is.na(currentDeaths[(1:12) + 12*(year - startYear)]))
    if(length(indexMissing) != 0){
      #if there are missing values, we find the number of accounted deaths
      currentDeathsTotal <- sum(currentDeaths[(1:12) + 12*(year - startYear)], na.rm = TRUE)
      #and calculate the deaths that are not accounted for using the yearly deaths for the state
      numNotAccounted <- od_year$Deaths[od_year$State == state & od_year$Year == year] - currentDeathsTotal

      #we then divide number of unaccounted deaths evenly by the number of months with missing values
      currentDeaths[(1:12) + 12*(year - startYear)][indexMissing] <- numNotAccounted/length(indexMissing)
    }else{
      #otherwise, if there is no missing values, we skip to the next year
      next
    }
  }
  #store the imputed values
  od_data$imputed_vals[od_data$State == state]<-currentDeaths
}

#group into 6 month time periods now and compute the total number of deaths in each period
od_data_grouped_data <- od_data %>%
  mutate(Time_Period_Start = lubridate::floor_date(Date , "6 months" )) %>%
  group_by(State, Time_Period_Start) %>%
  summarise(sum_deaths = sum(imputed_vals, na.rm = TRUE))

#restrict the dataset to be between 2000 and 2017
od_data_grouped_data <- od_data_grouped_data[year(od_data_grouped_data$Time_Period_Start) > 1999 &
                     year(od_data_grouped_data$Time_Period_Start) < 2020,]

#create a new dataset for the analysis using columns from the main analysis data
sensitivity_anlys_imputed_od_wo_interp_data <- main_analysis_data %>%
  ungroup() %>%
  mutate(imputed_deaths_no_interp = od_data_grouped_data$sum_deaths,
         num_alive_no_interp = population - imputed_deaths_no_interp) %>%
  dplyr::select(-c(imputed_deaths, num_alive)) #want to remove the outcome from main analysis to not get confused

#compute the number of states with intervention
sensitivity_anlys_imputed_od_wo_interp_data <- sensitivity_anlys_imputed_od_wo_interp_data %>%
  group_by(Time_Period_Start) %>%
  mutate(num_states_with_intervention = sum(Intervention_Redefined))

#run the model for analysis
sensitivity_anlys_imputed_od_wo_interp <- gam(cbind(round(imputed_deaths_no_interp),
                                                    round(num_alive_no_interp))~ State +
                                                s(Time_Period_ID, bs = "cr",
                                                  by = as.factor(Region)) +
                                                Naloxone_Pharmacy_Yes_Redefined +
                                                Naloxone_Pharmacy_No_Redefined +
                                                Medical_Marijuana_Redefined +
                                                Recreational_Marijuana_Redefined +
                                                GSL_Redefined +
                                                PDMP_Redefined +
                                                Medicaid_Expansion_Redefined +
                                                Intervention_Redefined + 
                                                num_states_w_intervention,
                                              data = sensitivity_anlys_imputed_od_wo_interp_data,
                                              family = "binomial")
stargazer(sensitivity_anlys_imputed_od_wo_interp, type = "html", dep.var.labels = "Unintentional Overdose Death")
Dependent variable:
Unintentional Overdose Death
StateAlaska 0.277***
(0.028)
StateArizona 0.315***
(0.014)
StateArkansas -0.390***
(0.020)
StateCalifornia -0.159***
(0.013)
StateColorado 0.097***
(0.016)
StateConnecticut 0.189***
(0.016)
StateDelaware 0.430***
(0.022)
StateFlorida 0.253***
(0.012)
StateGeorgia -0.071***
(0.013)
StateHawaii -0.226***
(0.026)
StateIdaho -0.136***
(0.024)
StateIllinois -0.016
(0.013)
StateIndiana 0.088***
(0.014)
StateIowa -0.740***
(0.021)
StateKansas -0.328***
(0.019)
StateKentucky 0.641***
(0.014)
StateLouisiana 0.294***
(0.014)
StateMaine 0.145***
(0.022)
StateMaryland -1.066***
(0.019)
StateMassachusetts 0.208***
(0.014)
StateMichigan -0.019
(0.014)
StateMinnesota -0.618***
(0.017)
StateMississippi -0.100***
(0.018)
StateMissouri 0.194***
(0.015)
StateMontana -0.360***
(0.029)
StateNebraska -0.883***
(0.029)
StateNevada 0.439***
(0.017)
StateNew Hampshire 0.258***
(0.020)
StateNew Jersey 0.107***
(0.013)
StateNew Mexico 0.631***
(0.017)
StateNew York -0.238***
(0.013)
StateNorth Carolina 0.177***
(0.013)
StateNorth Dakota -1.053***
(0.045)
StateOhio 0.455***
(0.012)
StateOklahoma 0.385***
(0.015)
StateOregon -0.194***
(0.018)
StatePennsylvania 0.436***
(0.012)
StateRhode Island 0.238***
(0.022)
StateSouth Carolina 0.221***
(0.015)
StateSouth Dakota -0.941***
(0.042)
StateTennessee 0.436***
(0.013)
StateTexas -0.204***
(0.012)
StateUtah 0.072***
(0.018)
StateVermont -0.167***
(0.031)
StateVirginia -0.113***
(0.014)
StateWashington 0.078***
(0.015)
StateWest Virginia 0.876***
(0.015)
StateWisconsin -0.037**
(0.015)
StateWyoming 0.024
(0.034)
Naloxone_Pharmacy_Yes_Redefined -0.024***
(0.008)
Naloxone_Pharmacy_No_Redefined 0.009
(0.007)
Medical_Marijuana_Redefined 0.063***
(0.006)
Recreational_Marijuana_Redefined -0.037***
(0.009)
GSL_Redefined 0.034***
(0.006)
PDMP_Redefined -0.019***
(0.006)
Medicaid_Expansion_Redefined 0.099***
(0.006)
Intervention_Redefined 0.062***
(0.005)
num_states_w_intervention 0.004**
(0.002)
s(Time_Period_ID):as.factor(Region)Midwest.1
s(Time_Period_ID):as.factor(Region)Midwest.2
s(Time_Period_ID):as.factor(Region)Midwest.3
s(Time_Period_ID):as.factor(Region)Midwest.4
s(Time_Period_ID):as.factor(Region)Midwest.5
s(Time_Period_ID):as.factor(Region)Midwest.6
s(Time_Period_ID):as.factor(Region)Midwest.7
s(Time_Period_ID):as.factor(Region)Midwest.8
s(Time_Period_ID):as.factor(Region)Midwest.9
s(Time_Period_ID):as.factor(Region)Northeast.1
s(Time_Period_ID):as.factor(Region)Northeast.2
s(Time_Period_ID):as.factor(Region)Northeast.3
s(Time_Period_ID):as.factor(Region)Northeast.4
s(Time_Period_ID):as.factor(Region)Northeast.5
s(Time_Period_ID):as.factor(Region)Northeast.6
s(Time_Period_ID):as.factor(Region)Northeast.7
s(Time_Period_ID):as.factor(Region)Northeast.8
s(Time_Period_ID):as.factor(Region)Northeast.9
s(Time_Period_ID):as.factor(Region)South.1
s(Time_Period_ID):as.factor(Region)South.2
s(Time_Period_ID):as.factor(Region)South.3
s(Time_Period_ID):as.factor(Region)South.4
s(Time_Period_ID):as.factor(Region)South.5
s(Time_Period_ID):as.factor(Region)South.6
s(Time_Period_ID):as.factor(Region)South.7
s(Time_Period_ID):as.factor(Region)South.8
s(Time_Period_ID):as.factor(Region)South.9
s(Time_Period_ID):as.factor(Region)West.1
s(Time_Period_ID):as.factor(Region)West.2
s(Time_Period_ID):as.factor(Region)West.3
s(Time_Period_ID):as.factor(Region)West.4
s(Time_Period_ID):as.factor(Region)West.5
s(Time_Period_ID):as.factor(Region)West.6
s(Time_Period_ID):as.factor(Region)West.7
s(Time_Period_ID):as.factor(Region)West.8
s(Time_Period_ID):as.factor(Region)West.9
Constant -9.911***
(0.054)
Observations 2,000
Adjusted R2 0.911
Log Likelihood -16,741.690
UBRE 8.831
Note: p<0.1; p<0.05; p<0.01
# exp(coef(sensitivity_anlys_imputed_od_wo_interp)["Intervention_Redefined"]) 

8.2 Compile Results

########## Sensitivity Analysis 3: Make Data Frame of Results and 95% CI #############
#store the coefficients into the table
sensitivity_anlys_wo_interp_full_table<-data.frame(coef(sensitivity_anlys_imputed_od_wo_interp))
#check to see how the table looks
head(sensitivity_anlys_wo_interp_full_table)
##                 coef.sensitivity_anlys_imputed_od_wo_interp.
## (Intercept)                                       -9.9109753
## StateAlaska                                        0.2774932
## StateArizona                                       0.3148168
## StateArkansas                                     -0.3899413
## StateCalifornia                                   -0.1588927
## StateColorado                                      0.0967274
#rename the column to "Coefficient_Estimate"
colnames(sensitivity_anlys_wo_interp_full_table)<-c("Coefficient_Estimate")

#vector of covariates
covariates<-c("Naloxone_Pharmacy_Yes_Redefined", 
              "Naloxone_Pharmacy_No_Redefined",
              "Medical_Marijuana_Redefined",
              "Recreational_Marijuana_Redefined",
              "GSL_Redefined", 
              "PDMP_Redefined",
              "Medicaid_Expansion_Redefined", 
              "Intervention_Redefined",
              "num_states_w_intervention")

#rename the variable names of the regression output so that they look nicer:
#currently there are 3 types of coefficients: state effects, the covariates, and smoothed time effects
#for each row in the main analysis table
for(i in 1:length(rownames(sensitivity_anlys_wo_interp_full_table))){

  #if the coefficient is not in the covariates vector
  if(!(rownames(sensitivity_anlys_wo_interp_full_table)[i] %in% covariates)){

    #we see if it's a state effect
    if(substr(rownames(sensitivity_anlys_wo_interp_full_table)[i], start = 1, stop = 5) == "State"){

      #if so, here, the names look like: StateMassachusetts or StateGeorgia, so take out the "State" part
      #and just rename these rows to just the state name
      rownames(sensitivity_anlys_wo_interp_full_table)[i]<-substr(rownames(sensitivity_anlys_wo_interp_full_table)[i], start = 6,
                                                                       stop = nchar(rownames(sensitivity_anlys_wo_interp_full_table)[i]))

    }else if(rownames(sensitivity_anlys_wo_interp_full_table)[i] == "(Intercept)"){

      #otherwise, if the current name is Intercept, we rename it so that we know that Alabama is the baseline
      rownames(sensitivity_anlys_wo_interp_full_table)[i]<-"Intercept/Alabama"

    }else if(substr(rownames(sensitivity_anlys_wo_interp_full_table)[i], start = 1, stop = 35) == "s(Time_Period_ID):as.factor(Region)"){

      #otherwise, it's the smoothed time effects which look like: s(Time_Period_ID):as.factor(Region)West
      #or s(Time_Period_ID):as.factor(Region)South, so we want to get rid of "s(Time_Period_ID):as.factor(Region)"
      #and change it to "Smoothed Time for Region"
      rownames(sensitivity_anlys_wo_interp_full_table)[i]<-paste("Smoothed Time for Region ",
                                                                      substr(rownames(sensitivity_anlys_wo_interp_full_table)[i], 
                                                                             start = 36,
                                                                             stop = nchar(rownames(sensitivity_anlys_wo_interp_full_table)[i])),
                                                                      sep = "")

    }
  }
}

#confidence intervals for the coefficients
sensitivity_anlys_wo_interp_full_table$Coefficient_Lower_Bound<-sensitivity_anlys_wo_interp_full_table$Coefficient_Estimate - 1.96*summary(sensitivity_anlys_imputed_od_wo_interp)$se
sensitivity_anlys_wo_interp_full_table$Coefficient_Upper_Bound<-sensitivity_anlys_wo_interp_full_table$Coefficient_Estimate + 1.96*summary(sensitivity_anlys_imputed_od_wo_interp)$se

#impute the estimates and confidence intervals in the odds ratio scale
sensitivity_anlys_wo_interp_full_table$Odds_Ratio<-exp(sensitivity_anlys_wo_interp_full_table$Coefficient_Estimate)
sensitivity_anlys_wo_interp_full_table$Odds_Ratio_LB<-exp(sensitivity_anlys_wo_interp_full_table$Coefficient_Lower_Bound)
sensitivity_anlys_wo_interp_full_table$Odds_Ratio_UB<-exp(sensitivity_anlys_wo_interp_full_table$Coefficient_Upper_Bound)

#store the standard error and p-value
sensitivity_anlys_wo_interp_full_table$Standard_Error<-summary(sensitivity_anlys_imputed_od_wo_interp)$se
#note that there is no p-value for the smoothed time effects, so we put a NA for those rows
sensitivity_anlys_wo_interp_full_table$p_value<-c(summary(sensitivity_anlys_imputed_od_wo_interp)$p.pv,
                                                       rep(NA, length(coef(sensitivity_anlys_imputed_od_wo_interp)) -
                                                             length(summary(sensitivity_anlys_imputed_od_wo_interp)$p.pv)))

head(sensitivity_anlys_wo_interp_full_table)
##                   Coefficient_Estimate Coefficient_Lower_Bound
## Intercept/Alabama           -9.9109753            -10.01690973
## Alaska                       0.2774932              0.22340459
## Arizona                      0.3148168              0.28749354
## Arkansas                    -0.3899413             -0.42845827
## California                  -0.1588927             -0.18490125
## Colorado                     0.0967274              0.06529704
##                   Coefficient_Upper_Bound   Odds_Ratio Odds_Ratio_LB
## Intercept/Alabama              -9.8050408 4.962701e-05  4.463868e-05
## Alaska                          0.3315819 1.319817e+00  1.250326e+00
## Arizona                         0.3421401 1.370008e+00  1.333082e+00
## Arkansas                       -0.3514244 6.770966e-01  6.515128e-01
## California                     -0.1328841 8.530879e-01  8.311864e-01
## Colorado                        0.1281578 1.101560e+00  1.067476e+00
##                   Odds_Ratio_UB Standard_Error       p_value
## Intercept/Alabama  5.517278e-05     0.05404819  0.000000e+00
## Alaska             1.393170e+00     0.02759624  8.690456e-24
## Arizona            1.407958e+00     0.01394046 6.372831e-113
## Arkansas           7.036851e-01     0.01965150  1.271106e-87
## California         8.755665e-01     0.01326967  4.855355e-33
## Colorado           1.136732e+00     0.01603590  1.620132e-09
tail(sensitivity_anlys_wo_interp_full_table)
##                                 Coefficient_Estimate Coefficient_Lower_Bound
## Smoothed Time for Region West.4            0.2351076              0.20582369
## Smoothed Time for Region West.5            0.1834126              0.12970208
## Smoothed Time for Region West.6            0.1610046              0.08631049
## Smoothed Time for Region West.7            0.2036279              0.11614240
## Smoothed Time for Region West.8            0.3082962              0.20154232
## Smoothed Time for Region West.9            0.4224613              0.33093139
##                                 Coefficient_Upper_Bound Odds_Ratio
## Smoothed Time for Region West.4               0.2643916   1.265045
## Smoothed Time for Region West.5               0.2371231   1.201310
## Smoothed Time for Region West.6               0.2356987   1.174690
## Smoothed Time for Region West.7               0.2911134   1.225842
## Smoothed Time for Region West.8               0.4150502   1.361104
## Smoothed Time for Region West.9               0.5139912   1.525712
##                                 Odds_Ratio_LB Odds_Ratio_UB Standard_Error
## Smoothed Time for Region West.4      1.228537      1.302638     0.01494079
## Smoothed Time for Region West.5      1.138489      1.267597     0.02740332
## Smoothed Time for Region West.6      1.090145      1.265793     0.03810924
## Smoothed Time for Region West.7      1.123156      1.337916     0.04463545
## Smoothed Time for Region West.8      1.223288      1.514447     0.05446629
## Smoothed Time for Region West.9      1.392264      1.671951     0.04669894
##                                 p_value
## Smoothed Time for Region West.4      NA
## Smoothed Time for Region West.5      NA
## Smoothed Time for Region West.6      NA
## Smoothed Time for Region West.7      NA
## Smoothed Time for Region West.8      NA
## Smoothed Time for Region West.9      NA
#export a table with just the covariates
#first, find the rows that contains the covariates
covariate_Index<-which(rownames(sensitivity_anlys_wo_interp_full_table) %in% covariates)
sensitivity_anlys_wo_interp_covariate_table<-(round(sensitivity_anlys_wo_interp_full_table[covariate_Index,], 5))

#rename the variables so that it looks cleaner
rownames(sensitivity_anlys_wo_interp_covariate_table)<-c("Naloxone_Pharmacy_Yes", 
                                                         "Naloxone_Pharmacy_No",
                                                         "Medical_Marijuana",
                                                         "Recreational_Marijuana",
                                                         "GSL", 
                                                         "PDMP", 
                                                         "Medicaid_Expansion",
                                                         "Intervention_Redefined",
                                                         "Number of States with Intervention")

#now, reorganize the data so that the covariates are on top and the rest of the variable sare below
sensitivity_anlys_wo_interp_covariate_table<-rbind(sensitivity_anlys_wo_interp_covariate_table, sensitivity_anlys_wo_interp_full_table[-covariate_Index,])
#remove the columns that aren't in odds ratio scale
sensitivity_anlys_wo_interp_covariate_table<-sensitivity_anlys_wo_interp_covariate_table[,-which(colnames(sensitivity_anlys_wo_interp_covariate_table) %in%
                                                                                                             c("Coefficient_Estimate", "Coefficient_Lower_Bound", "Coefficient_Upper_Bound", "Standard_Error"))]

colnames(sensitivity_anlys_wo_interp_covariate_table)<-c("Risk_Ratio_Estimates", "RR_95_CI_LB", "RR_95_CI_UB", "p-value")
head(sensitivity_anlys_wo_interp_covariate_table, 10)
##                                    Risk_Ratio_Estimates  RR_95_CI_LB
## Naloxone_Pharmacy_Yes                      9.763200e-01 9.615900e-01
## Naloxone_Pharmacy_No                       1.008580e+00 9.957900e-01
## Medical_Marijuana                          1.064830e+00 1.053090e+00
## Recreational_Marijuana                     9.636800e-01 9.475200e-01
## GSL                                        1.034510e+00 1.023170e+00
## PDMP                                       9.807700e-01 9.696700e-01
## Medicaid_Expansion                         1.103950e+00 1.090910e+00
## Intervention_Redefined                     1.063980e+00 1.052840e+00
## Number of States with Intervention         1.003700e+00 1.000080e+00
## Intercept/Alabama                          4.962701e-05 4.463868e-05
##                                     RR_95_CI_UB p-value
## Naloxone_Pharmacy_Yes              9.912800e-01 0.00200
## Naloxone_Pharmacy_No               1.021520e+00 0.18940
## Medical_Marijuana                  1.076690e+00 0.00000
## Recreational_Marijuana             9.801100e-01 0.00002
## GSL                                1.045970e+00 0.00000
## PDMP                               9.919900e-01 0.00082
## Medicaid_Expansion                 1.117130e+00 0.00000
## Intervention_Redefined             1.075240e+00 0.00000
## Number of States with Intervention 1.007330e+00 0.04506
## Intercept/Alabama                  5.517278e-05 0.00000
#save the table into a CSV
# write.csv(round(sensitivity_anlys_wo_interp_covariate_table, 3), "./Data/coefficients_covariates_9_6_21_wo_interp.csv")

8.3 Attributable Deaths

################ Sensitivity Analysis 3: Number of Attributable Deaths #############
#first, we subset the data so that we only focus on the time points for which at least one state had the intervention
attr_deaths_anlys_wo_interp<-sensitivity_anlys_imputed_od_wo_interp_data[which(sensitivity_anlys_imputed_od_wo_interp_data$Intervention_Redefined>0),]

#compute the probability of overdose had intervention not occurred
prob_od_no_int_wo_interpolation<-expit(-coef(sensitivity_anlys_imputed_od_wo_interp)["Intervention_Redefined"]*attr_deaths_anlys_wo_interp$Intervention_Redefined
                                     + logit(attr_deaths_anlys_wo_interp$imputed_deaths_no_interp/attr_deaths_anlys_wo_interp$population))

#compute the lower and upper bounds of 95% CI of probability of overdose had intervention not occurred
#here, we compute the lower and upper bounds of the 95% CI of all the coefficients using the standard error from the model
coef_lb<-coef(sensitivity_anlys_imputed_od_wo_interp) - 1.96*summary(sensitivity_anlys_imputed_od_wo_interp)$se
coef_ub<-coef(sensitivity_anlys_imputed_od_wo_interp) + 1.96*summary(sensitivity_anlys_imputed_od_wo_interp)$se

#we then calculate the upper and lower bounds of the probability of overdose death had intervention not occurred by using
#the lower and upper bounds of the coefficient of the intervention variable
prob_od_no_int_LB_wo_interp<-expit(-coef_lb[names(coef_lb) == "Intervention_Redefined"]*attr_deaths_anlys_wo_interp$Intervention_Redefined
                                        + logit(attr_deaths_anlys_wo_interp$imputed_deaths_no_interp/attr_deaths_anlys_wo_interp$population))

prob_od_no_int_UB_wo_interp<-expit(-coef_ub[names(coef_ub) == "Intervention_Redefined"]*attr_deaths_anlys_wo_interp$Intervention_Redefined
                                        + logit(attr_deaths_anlys_wo_interp$imputed_deaths_no_interp/attr_deaths_anlys_wo_interp$population))

#estimate the number of deaths attributable to the intervention
#first, initialize the vectors to store the numbers
num_attr_od_UB<-num_attr_od_LB<-num_attr_od<-rep(NA, length(unique(sensitivity_anlys_imputed_od_wo_interp_data$Time_Period_ID)))


#for each time period, we first find the indices of rows containing data from that time point
#then, we find the total number of deaths that attributable to the intervention

index<-1 #keep track of where to store the values in the vector

for(time in sort(unique(attr_deaths_anlys_wo_interp$Time_Period_ID))){
  #find the indices of rows where the time point = time
  time_point_index<-which(attr_deaths_anlys_wo_interp$Time_Period_ID == time)

  #find the number of deaths attributable to intervention = observed number of deaths with intervention - estimated number of deaths had intervention not occurred
  num_attr_od[index]<-sum(attr_deaths_anlys_wo_interp$imputed_deaths_no_interp[time_point_index]
                          - prob_od_no_int_wo_interpolation[time_point_index]*attr_deaths_anlys_wo_interp$population[time_point_index])

  #find the lower and upper bounds of the estimated number of deaths attributable to the intervention
  num_attr_od_LB[index]<-sum(attr_deaths_anlys_wo_interp$imputed_deaths_no_interp[time_point_index]
                             - prob_od_no_int_LB_wo_interp[time_point_index]*attr_deaths_anlys_wo_interp$population[time_point_index])
  num_attr_od_UB[index]<-sum(attr_deaths_anlys_wo_interp$imputed_deaths_no_interp[time_point_index]
                             - prob_od_no_int_UB_wo_interp[time_point_index]*attr_deaths_anlys_wo_interp$population[time_point_index])


  index<-index + 1
}

num_attr_od_wo_interpolation<-data.frame("Time_Period_ID" = sort(unique(attr_deaths_anlys_wo_interp$Time_Period_ID)),
                                       "Time_Start" = sort(unique(attr_deaths_anlys_wo_interp$Time_Period_Start)),
                                       "Num_Attr_Deaths" = num_attr_od,
                                       "Num_Attr_Deaths_LB" = num_attr_od_LB,
                                       "Num_Attr_Deaths_UB" = num_attr_od_UB)

#sum up the total number of excess deaths attributable to the intervention
sum(num_attr_od_wo_interpolation$Num_Attr_Deaths)
## [1] 33215.68
#sum up the number of excess deaths per year
yearly_num_Attr_Deaths_wo_interpolation<-num_attr_od_wo_interpolation %>%
  group_by("year" = year(Time_Start)) %>%
  summarise("deaths" = sum(Num_Attr_Deaths), death_lb = sum(Num_Attr_Deaths_LB),
            death_ub = sum(Num_Attr_Deaths_UB))

summary(yearly_num_Attr_Deaths_wo_interpolation$deaths)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   29.91  755.56 1413.82 1660.78 2313.50 3696.16

9 Sensitivity Analysis 4: Effect of DIH Prosecutions on Unintentional Overdose Deaths, Assuming Effect Lasts for Two Years

9.1 Analysis

########### Sensitivity Analysis 4: Two Year Intervention Effect ######################
#create a plot for each state to see how many prosecution media alerts there are per 6 month period
#read in the prosecution media alert data
prosecution_data<-read.csv("./Data/dih_prosecutions_9_6_21.csv")

#data cleaning
prosecution_data<-prosecution_data %>% 
  mutate(Date = as.Date(Date.charged, "%m/%d/%Y")) %>%
  mutate(State = ifelse(State.Filed == "pennsylvania", "Pennsylvania", State.Filed),
         State = ifelse(State.Filed == "Virginia ", "Virginia", State)) %>%
  filter(!is.na(Date), State.Filed != "No Info", State.Filed != "No info", State.Filed != "No Info ",
         State != "")

#clean up the data by looking at the link to the article
prosecution_data$Date[prosecution_data$Date == "2026-08-01"] <- as.Date("2016-02-15", "%Y-%m-%d")

#change the states into Character instead of factor
prosecution_data$State<-as.character(prosecution_data$State)
#see how many prosecution data points there are for each state
table(prosecution_data$State)
   Alabama         Alaska        Arizona       Arkansas     California 
        12              8              9              4             76 
  Colorado    Connecticut       Delaware        Florida        Georgia 
        32             47              3            138             29 
     Idaho       Illinois        Indiana           Iowa         Kansas 
         9            342             55             31              9 
  Kentucky      Louisiana          Maine       Maryland  Massachusetts 
        43             65             17             63             34 
  Michigan      Minnesota    Mississippi       Missouri        Montana 
       116            140              1             45             11 
  Nebraska         Nevada  New Hampshire     New Jersey     New Mexico 
         1             13             42            137              4 
  New York North Carolina   North Dakota           Ohio       Oklahoma 
       110            124             53            404             41 
    Oregon   Pennsylvania   Rhode Island South Carolina   South Dakota 
        19            726              2             12             13 
 Tennessee          Texas           Utah        Vermont       Virginia 
        94             44             21             13             63 
Washington  West Virginia      Wisconsin        Wyoming 
        78             33            381             19 
#there are some repeated cases depending on victim
prosecution_data_unique <- prosecution_data %>%
  group_by(State) %>%
  distinct(Accused.Name, Date, .keep_all = T)
table(prosecution_data_unique$State)
   Alabama         Alaska        Arizona       Arkansas     California 
        12              8              9              4             72 
  Colorado    Connecticut       Delaware        Florida        Georgia 
        30             46              3            134             26 
     Idaho       Illinois        Indiana           Iowa         Kansas 
         9            336             53             31              9 
  Kentucky      Louisiana          Maine       Maryland  Massachusetts 
        43             65             17             62             34 
  Michigan      Minnesota    Mississippi       Missouri        Montana 
       114            140              1             44             10 
  Nebraska         Nevada  New Hampshire     New Jersey     New Mexico 
         1             13             42            131              4 
  New York North Carolina   North Dakota           Ohio       Oklahoma 
       105            121             40            395             34 
    Oregon   Pennsylvania   Rhode Island South Carolina   South Dakota 
        19            718              2             12             13 
 Tennessee          Texas           Utah        Vermont       Virginia 
        94             43             21             13             63 
Washington  West Virginia      Wisconsin        Wyoming 
        75             33            373             19 
#change date charged into Date object
prosecution_data_unique$Date<-mdy(prosecution_data_unique$Date.charged)

#group the data into six month periods
prosecution_data_unique<-prosecution_data_unique %>% 
  mutate(six_month_pd = lubridate::floor_date(Date , "6 months" ))

# #######ONLY IF GROUPS######
# prosecution_grouped <- prosecution_data_unique %>% 
#   #filter to dates after 2000 and dates before 2020
#   filter(year(six_month_pd) >= 2000 & year(six_month_pd) <= 2019) %>%
#   group_by(State, six_month_pd) %>% 
#   #for each state, for each six month period, count the number of DIH prosecutions
#   summarise(num_dih = n()) %>% 
#   #label the groups according to zero, low, or high
#   mutate(group = ifelse(num_dih == 0, "zero", ifelse(num_dih >= 5, "high", "low"))) %>%
#   ungroup() %>%
#   #have to add in a row for hawaii because its not in the prosecution dataset
#   add_row(State = "Hawaii", six_month_pd = as.Date("2000-01-01"), num_dih = 0, group = "zero")
# 
# #we compute the final group for each state by seeing if it ever hits high or low
# prosecution_grouped_final <- prosecution_grouped %>%  
#   group_by(State) %>% 
#   summarise(final_gp = ifelse(sum(group == "high") > 0, "high", ifelse(sum(group == "low")> 0, "low", "zero"))) 
# 
# ggplot(prosecution_grouped_final, aes(final_gp)) + 
#   geom_bar() + 
#   labs(title = "Number of States by DIH prosecution Category, with Low = [1,5]") + 
#   geom_text(aes(label = ..count..), stat = "count", vjust = -.75)
# 
# #number of DIH prosecutions per six month for each state
# # pdf("Figures/num_dih_per_six_month_pd_by_state_11_12_21.pdf")
# ggplot(prosecution_grouped, aes(x = six_month_pd, y = num_dih)) + 
#   geom_bar(stat = "identity") + 
#   facet_wrap(~State) + 
#   theme(axis.text.x = element_text(angle = 30, size = 5))
# # dev.off()
# 
# # write.csv(prosecution_grouped, "./Data/num_dih_per_six_month_pd_by_state_11_12_21.csv")
# 
#count the number of prosecution media alerts in each six month period
#we also get the first and last date of prosecution in time period
prosecution_data_by_six_month_pd <- prosecution_data_unique %>%
  filter(year(six_month_pd)>1999 & year(six_month_pd)<2020) %>%
  group_by(State, six_month_pd) %>%
  summarise(first_date_in_pd = min(Date), last_date_in_pd = max(Date))

#create the data set used for this sensitivity analysis
#first, we merge the grouped prosecution data set with the main data set by state and time period
sensitivity_anlys_redefine_int_data<-merge(main_analysis_data,
                                           prosecution_data_by_six_month_pd,
                                           by.x = c("State", "Time_Period_Start"),
                                           by.y = c("State", "six_month_pd"), all = TRUE)

#create a intervention 2 year effect variable by initializing it to be all 0
sensitivity_anlys_redefine_int_data<-sensitivity_anlys_redefine_int_data %>%
  group_by(State) %>%
  mutate(int_2_yr_effect = 0)

#change the date into a date object
sensitivity_anlys_redefine_int_data$Time_Period_Start<-as.Date(sensitivity_anlys_redefine_int_data$Time_Period_Start)
sensitivity_anlys_redefine_int_data$Time_Period_End<-as.Date(sensitivity_anlys_redefine_int_data$Time_Period_End)

#we need to impute the newly defined intervention variable depending on the case
#by examining each row of the data set
for(state in unique(sensitivity_anlys_redefine_int_data$State)){
  #first, subset the data set into state_data which only contains the data for the state
  state_index<-which(sensitivity_anlys_redefine_int_data$State == state)
  state_data<-sensitivity_anlys_redefine_int_data[state_index,]

  #note that the first four rows of the 2 year effect intervention variable are the same as the
  #first four rows of the original intervention variable
  state_data$int_2_yr_effect[1:4]<-state_data$Intervention_Redefined[1:4]

  for(i in 5:nrow(state_data)){
    #next, we deal with the rows where there was at least one prosecution in the last 3 six month periods
    #These rows will be imputed with a 1
    if((!is.na(state_data$first_date_in_pd[i - 1]) |
        !is.na(state_data$first_date_in_pd[i - 2]) |
        !is.na(state_data$first_date_in_pd[i - 3]))){

      state_data$int_2_yr_effect[i]<-1

    }else{
      #next, we account for the rows with the fractions:
      # 1) an intervention occurs in row i without an intervention 2 years ago
      # 2) row i contains the lasting effects of an intervention that occurred 2 years ago
      # 3) row i contains effects from both a new intervention starting in row i and lasting
      # effects from 2 years ago

      #To compute the fraction, we add the number of days that are affected by an intervention
      #(from both the current prosecution and previous prosecution) and then divide by the total
      #number of days in the period:

      total_len_of_pd<-as.numeric(state_data$Time_Period_End[i] - state_data$Time_Period_Start[i])

      #If there is no prosecution two years ago, i.e. in period i-4, then the last_date is the first
      #date in period i. We subtract the last_date by the first date in the period, so we will get
      #a 0 for the number of days that are affected by a prosecution from period i-4. Otherwise,
      #the last_date is the last date of prosecution from period i-4, plus 2 years.
      len_of_past_effect <- ifelse(!is.na(state_data$first_date_in_pd[i - 4]),
                                   (state_data$last_date_in_pd[i - 4] + years(2)) - state_data$Time_Period_Start[i],
                                   0)

      #If there is no prosecution in the period i, then the start_date is the last date in the period i.
      #We subtract start_date from the last date in period i, so we will get a 0 for the number
      #of days that are affected by a prosecution in period i. Otherwise, the start_date is the
      #first date of a prosecution in period i.
      len_of_current_effect <- ifelse(!is.na(state_data$first_date_in_pd[i]),
                                      as.numeric(state_data$Time_Period_End[i] - state_data$first_date_in_pd[i]),
                                      0)

      state_data$int_2_yr_effect[i]<-(len_of_past_effect + len_of_current_effect)/total_len_of_pd
    }
  }

  #for the case where the int_2_yr_effect is greater than 1 (could result when we add the effects of
  #previous intervention and the current intervention), we just impute a 1 instead
  state_data$int_2_yr_effect[state_data$int_2_yr_effect>1]<-1

  #lastly, we store the int_2_yr_effect variable into the sensitivity analysis data set
  sensitivity_anlys_redefine_int_data$int_2_yr_effect[state_index]<-state_data$int_2_yr_effect
}

#view the data set just to make sure the imputation looks right
# View(sensitivity_anlys_redefine_int_data %>% select(State, Time_Period_Start, Time_Period_End,
#                                                     Intervention_Redefined, first_date_in_pd,
#                                                     last_date_in_pd,
#                                                     int_2_yr_effect))


sensitivity_anlys_redefine_int_data <- sensitivity_anlys_redefine_int_data %>%
  group_by(Time_Period_Start) %>%
  mutate(num_states_w_intervention_2_yr_effect = sum(int_2_yr_effect))

#run the analysis on the sensitivity analysis data
sensitivity_anlys_redefine_int_model<-gam(cbind(round(imputed_deaths), round(num_alive))~ State +
                                            s(Time_Period_ID, bs = "cr", by = as.factor(Region))  +
                                            Naloxone_Pharmacy_Yes_Redefined +
                                            Naloxone_Pharmacy_No_Redefined +
                                            Medical_Marijuana_Redefined +
                                            Recreational_Marijuana_Redefined +
                                            GSL_Redefined +
                                            PDMP_Redefined +
                                            Medicaid_Expansion_Redefined +
                                            int_2_yr_effect +
                                            num_states_w_intervention_2_yr_effect,
                                          data = sensitivity_anlys_redefine_int_data, family = "binomial")

stargazer(sensitivity_anlys_redefine_int_model, type = "html", dep.var.labels = "Unintentional Overdose Death")
Dependent variable:
Unintentional Overdose Death
StateAlaska 0.264***
(0.028)
StateArizona 0.315***
(0.014)
StateArkansas -0.369***
(0.020)
StateCalifornia -0.157***
(0.013)
StateColorado 0.087***
(0.016)
StateConnecticut 0.179***
(0.016)
StateDelaware 0.420***
(0.022)
StateFlorida 0.248***
(0.012)
StateGeorgia -0.056***
(0.013)
StateHawaii -0.241***
(0.025)
StateIdaho -0.142***
(0.024)
StateIllinois -0.021
(0.013)
StateIndiana 0.082***
(0.014)
StateIowa -0.740***
(0.021)
StateKansas -0.310***
(0.019)
StateKentucky 0.634***
(0.014)
StateLouisiana 0.293***
(0.014)
StateMaine 0.142***
(0.022)
StateMaryland -1.067***
(0.019)
StateMassachusetts 0.208***
(0.014)
StateMichigan -0.028**
(0.014)
StateMinnesota -0.628***
(0.017)
StateMississippi -0.089***
(0.018)
StateMissouri 0.197***
(0.015)
StateMontana -0.354***
(0.029)
StateNebraska -0.863***
(0.029)
StateNevada 0.443***
(0.017)
StateNew Hampshire 0.250***
(0.020)
StateNew Jersey 0.110***
(0.013)
StateNew Mexico 0.641***
(0.017)
StateNew York -0.244***
(0.013)
StateNorth Carolina 0.177***
(0.013)
StateNorth Dakota -1.060***
(0.045)
StateOhio 0.450***
(0.012)
StateOklahoma 0.381***
(0.015)
StateOregon -0.202***
(0.018)
StatePennsylvania 0.430***
(0.012)
StateRhode Island 0.245***
(0.022)
StateSouth Carolina 0.216***
(0.015)
StateSouth Dakota -0.965***
(0.043)
StateTennessee 0.446***
(0.013)
StateTexas -0.197***
(0.012)
StateUtah 0.081***
(0.018)
StateVermont -0.162***
(0.031)
StateVirginia -0.114***
(0.014)
StateWashington 0.064***
(0.015)
StateWest Virginia 0.868***
(0.015)
StateWisconsin -0.040***
(0.015)
StateWyoming 0.026
(0.034)
Naloxone_Pharmacy_Yes_Redefined -0.034***
(0.008)
Naloxone_Pharmacy_No_Redefined 0.003
(0.007)
Medical_Marijuana_Redefined 0.067***
(0.006)
Recreational_Marijuana_Redefined -0.039***
(0.009)
GSL_Redefined 0.036***
(0.006)
PDMP_Redefined -0.017***
(0.006)
Medicaid_Expansion_Redefined 0.103***
(0.006)
int_2_yr_effect 0.058***
(0.005)
num_states_w_intervention_2_yr_effect -0.002**
(0.001)
s(Time_Period_ID):as.factor(Region)Midwest.1
s(Time_Period_ID):as.factor(Region)Midwest.2
s(Time_Period_ID):as.factor(Region)Midwest.3
s(Time_Period_ID):as.factor(Region)Midwest.4
s(Time_Period_ID):as.factor(Region)Midwest.5
s(Time_Period_ID):as.factor(Region)Midwest.6
s(Time_Period_ID):as.factor(Region)Midwest.7
s(Time_Period_ID):as.factor(Region)Midwest.8
s(Time_Period_ID):as.factor(Region)Midwest.9
s(Time_Period_ID):as.factor(Region)Northeast.1
s(Time_Period_ID):as.factor(Region)Northeast.2
s(Time_Period_ID):as.factor(Region)Northeast.3
s(Time_Period_ID):as.factor(Region)Northeast.4
s(Time_Period_ID):as.factor(Region)Northeast.5
s(Time_Period_ID):as.factor(Region)Northeast.6
s(Time_Period_ID):as.factor(Region)Northeast.7
s(Time_Period_ID):as.factor(Region)Northeast.8
s(Time_Period_ID):as.factor(Region)Northeast.9
s(Time_Period_ID):as.factor(Region)South.1
s(Time_Period_ID):as.factor(Region)South.2
s(Time_Period_ID):as.factor(Region)South.3
s(Time_Period_ID):as.factor(Region)South.4
s(Time_Period_ID):as.factor(Region)South.5
s(Time_Period_ID):as.factor(Region)South.6
s(Time_Period_ID):as.factor(Region)South.7
s(Time_Period_ID):as.factor(Region)South.8
s(Time_Period_ID):as.factor(Region)South.9
s(Time_Period_ID):as.factor(Region)West.1
s(Time_Period_ID):as.factor(Region)West.2
s(Time_Period_ID):as.factor(Region)West.3
s(Time_Period_ID):as.factor(Region)West.4
s(Time_Period_ID):as.factor(Region)West.5
s(Time_Period_ID):as.factor(Region)West.6
s(Time_Period_ID):as.factor(Region)West.7
s(Time_Period_ID):as.factor(Region)West.8
s(Time_Period_ID):as.factor(Region)West.9
Constant -9.738***
(0.028)
Observations 2,000
Adjusted R2 0.911
Log Likelihood -16,745.360
UBRE 8.835
Note: p<0.1; p<0.05; p<0.01

9.2 Plots

plot(sensitivity_anlys_redefine_int_model)

9.3 Compile Results

############## Sensitivity Analysis 4: Make Data Frame of Results and 95% CI ##########
#store the coefficients into the table
sensitivity_anlys_redefine_int_full_table<-data.frame(coef(sensitivity_anlys_redefine_int_model))
#check to see how the table looks
head(sensitivity_anlys_redefine_int_full_table)
##                 coef.sensitivity_anlys_redefine_int_model.
## (Intercept)                                    -9.73777125
## StateAlaska                                     0.26352781
## StateArizona                                    0.31539440
## StateArkansas                                  -0.36941493
## StateCalifornia                                -0.15738403
## StateColorado                                   0.08700546
#rename the column to "Coefficient_Estimate"
colnames(sensitivity_anlys_redefine_int_full_table)<-c("Coefficient_Estimate")

#vector of covariates
covariates<-c("Naloxone_Pharmacy_Yes_Redefined", 
              "Naloxone_Pharmacy_No_Redefined",
              "Medical_Marijuana_Redefined",
              "Recreational_Marijuana_Redefined",
              "GSL_Redefined", 
              "PDMP_Redefined",
              "Medicaid_Expansion_Redefined", 
              "int_2_yr_effect", 
              "num_states_w_intervention_2_yr_effect")

#rename the variable names of the regression output so that they look nicer:
#currently there are 3 types of coefficients: state effects, the covariates, and smoothed time effects
#for each row in the main analysis table
for(i in 1:length(rownames(sensitivity_anlys_redefine_int_full_table))){

  #if the coefficient is not in the covariates vector
  if(!(rownames(sensitivity_anlys_redefine_int_full_table)[i] %in% covariates)){

    #we see if it's a state effect
    if(substr(rownames(sensitivity_anlys_redefine_int_full_table)[i], start = 1, stop = 5) == "State"){

      #if so, here, the names look like: StateMassachusetts or StateGeorgia, so take out the "State" part
      #and just rename these rows to just the state name
      rownames(sensitivity_anlys_redefine_int_full_table)[i]<-substr(rownames(sensitivity_anlys_redefine_int_full_table)[i], start = 6,
                                                                     stop = nchar(rownames(sensitivity_anlys_redefine_int_full_table)[i]))

    }else if(rownames(sensitivity_anlys_redefine_int_full_table)[i] == "(Intercept)"){

      #otherwise, if the current name is Intercept, we rename it so that we know that Alabama is the baseline
      rownames(sensitivity_anlys_redefine_int_full_table)[i]<-"Intercept/Alabama"

    }else if(substr(rownames(sensitivity_anlys_redefine_int_full_table)[i], start = 1, stop = 35) == "s(Time_Period_ID):as.factor(Region)"){

      #otherwise, it's the smoothed time effects which look like: s(Time_Period_ID):as.factor(Region)West
      #or s(Time_Period_ID):as.factor(Region)South, so we want to get rid of "s(Time_Period_ID):as.factor(Region)"
      #and change it to "Smoothed Time for Region"
      rownames(sensitivity_anlys_redefine_int_full_table)[i]<-paste("Smoothed Time for Region ",
                                                                    substr(rownames(sensitivity_anlys_redefine_int_full_table)[i], start = 36,
                                                                           stop = nchar(rownames(sensitivity_anlys_redefine_int_full_table)[i])),
                                                                    sep = "")

    }
  }
}

#confidence intervals for the coefficients
sensitivity_anlys_redefine_int_full_table$Coefficient_Lower_Bound<-sensitivity_anlys_redefine_int_full_table$Coefficient_Estimate - 1.96*summary(sensitivity_anlys_redefine_int_model)$se
sensitivity_anlys_redefine_int_full_table$Coefficient_Upper_Bound<-sensitivity_anlys_redefine_int_full_table$Coefficient_Estimate + 1.96*summary(sensitivity_anlys_redefine_int_model)$se

#impute the estimates and confidence intervals in the odds ratio scale
sensitivity_anlys_redefine_int_full_table$Odds_Ratio<-exp(sensitivity_anlys_redefine_int_full_table$Coefficient_Estimate)
sensitivity_anlys_redefine_int_full_table$Odds_Ratio_LB<-exp(sensitivity_anlys_redefine_int_full_table$Coefficient_Lower_Bound)
sensitivity_anlys_redefine_int_full_table$Odds_Ratio_UB<-exp(sensitivity_anlys_redefine_int_full_table$Coefficient_Upper_Bound)

#store the standard error and p-value
sensitivity_anlys_redefine_int_full_table$Standard_Error<-summary(sensitivity_anlys_redefine_int_model)$se
#note that there is no p-value for the smoothed time effects, so we put a NA for those rows
sensitivity_anlys_redefine_int_full_table$p_value<-c(summary(sensitivity_anlys_redefine_int_model)$p.pv,
                                                     rep(NA, length(coef(sensitivity_anlys_redefine_int_model)) -
                                                           length(summary(sensitivity_anlys_redefine_int_model)$p.pv)))

head(sensitivity_anlys_redefine_int_full_table)
##                   Coefficient_Estimate Coefficient_Lower_Bound
## Intercept/Alabama          -9.73777125             -9.79199494
## Alaska                      0.26352781              0.20958992
## Arizona                     0.31539440              0.28809260
## Arkansas                   -0.36941493             -0.40821397
## California                 -0.15738403             -0.18334805
## Colorado                    0.08700546              0.05563784
##                   Coefficient_Upper_Bound   Odds_Ratio Odds_Ratio_LB
## Intercept/Alabama              -9.6835476 5.901191e-05  5.589727e-05
## Alaska                          0.3174657 1.301513e+00  1.233172e+00
## Arizona                         0.3426962 1.370800e+00  1.333881e+00
## Arkansas                       -0.3306159 6.911386e-01  6.648366e-01
## California                     -0.1314200 8.543759e-01  8.324784e-01
## Colorado                        0.1183731 1.090903e+00  1.057215e+00
##                   Odds_Ratio_UB Standard_Error       p_value
## Intercept/Alabama  0.0000623001     0.02766515  0.000000e+00
## Alaska             1.3736421285     0.02751933  1.007809e-21
## Arizona            1.4087407223     0.01392949 1.664931e-113
## Arkansas           0.7184810905     0.01979543  1.015868e-77
## California         0.8768494050     0.01324695  1.489753e-32
## Colorado           1.1256640001     0.01600389  5.433111e-08
tail(sensitivity_anlys_redefine_int_full_table)
##                                 Coefficient_Estimate Coefficient_Lower_Bound
## Smoothed Time for Region West.4            0.2581725               0.2371533
## Smoothed Time for Region West.5            0.2624918               0.2309442
## Smoothed Time for Region West.6            0.2674418               0.2293683
## Smoothed Time for Region West.7            0.3501250               0.3017143
## Smoothed Time for Region West.8            0.4826199               0.4246320
## Smoothed Time for Region West.9            0.5736609               0.5182325
##                                 Coefficient_Upper_Bound Odds_Ratio
## Smoothed Time for Region West.4               0.2791917   1.294562
## Smoothed Time for Region West.5               0.2940395   1.300166
## Smoothed Time for Region West.6               0.3055153   1.306618
## Smoothed Time for Region West.7               0.3985358   1.419245
## Smoothed Time for Region West.8               0.5406077   1.620314
## Smoothed Time for Region West.9               0.6290893   1.774752
##                                 Odds_Ratio_LB Odds_Ratio_UB Standard_Error
## Smoothed Time for Region West.4      1.267635      1.322061     0.01072407
## Smoothed Time for Region West.5      1.259789      1.341837     0.01609575
## Smoothed Time for Region West.6      1.257805      1.357324     0.01942526
## Smoothed Time for Region West.7      1.352175      1.489642     0.02469936
## Smoothed Time for Region West.8      1.529028      1.717050     0.02958563
## Smoothed Time for Region West.9      1.679057      1.875901     0.02827980
##                                 p_value
## Smoothed Time for Region West.4      NA
## Smoothed Time for Region West.5      NA
## Smoothed Time for Region West.6      NA
## Smoothed Time for Region West.7      NA
## Smoothed Time for Region West.8      NA
## Smoothed Time for Region West.9      NA
#export a table with just the covariates
#first, find the rows that contains the covariates
covariate_Index<-which(rownames(sensitivity_anlys_redefine_int_full_table) %in% covariates)
sensitivity_anlys_redefine_int_covariate_table<-(round(sensitivity_anlys_redefine_int_full_table[covariate_Index,], 5))

#rename the variables so that it looks cleaner
rownames(sensitivity_anlys_redefine_int_covariate_table)<-c("Naloxone_Pharmacy_Yes", 
                                                            "Naloxone_Pharmacy_No",
                                                            "Medical_Marijuana",
                                                            "Recreational_Marijuana",
                                                            "GSL", 
                                                            "PDMP", 
                                                            "Medicaid_Expansion",
                                                            "Two Year Intervention Effect", 
                                                            "Number of States w Intervention")

#now, reorganize the data so that the covariates are on top and the rest of the variable sare below
sensitivity_anlys_redefine_int_covariate_table<-rbind(sensitivity_anlys_redefine_int_covariate_table, sensitivity_anlys_redefine_int_full_table[-covariate_Index,])
#remove the columns that aren't in odds ratio scale
sensitivity_anlys_redefine_int_covariate_table<-sensitivity_anlys_redefine_int_covariate_table[,-which(colnames(sensitivity_anlys_redefine_int_covariate_table) %in%
                                                                                                         c("Coefficient_Estimate", "Coefficient_Lower_Bound", "Coefficient_Upper_Bound", "Standard_Error"))]

colnames(sensitivity_anlys_redefine_int_covariate_table)<-c("Risk_Ratio_Estimates", "RR_95_CI_LB", "RR_95_CI_UB", "p-value")
head(sensitivity_anlys_redefine_int_covariate_table, 10)
##                                 Risk_Ratio_Estimates  RR_95_CI_LB RR_95_CI_UB
## Naloxone_Pharmacy_Yes                   9.664500e-01 9.517900e-01 9.81330e-01
## Naloxone_Pharmacy_No                    1.002860e+00 9.901100e-01 1.01578e+00
## Medical_Marijuana                       1.069810e+00 1.058000e+00 1.08175e+00
## Recreational_Marijuana                  9.615700e-01 9.454300e-01 9.77970e-01
## GSL                                     1.036740e+00 1.025360e+00 1.04826e+00
## PDMP                                    9.833200e-01 9.721900e-01 9.94570e-01
## Medicaid_Expansion                      1.108180e+00 1.095140e+00 1.12138e+00
## Two Year Intervention Effect            1.059240e+00 1.049520e+00 1.06905e+00
## Number of States w Intervention         9.975500e-01 9.955300e-01 9.99570e-01
## Intercept/Alabama                       5.901191e-05 5.589727e-05 6.23001e-05
##                                 p-value
## Naloxone_Pharmacy_Yes           0.00001
## Naloxone_Pharmacy_No            0.66138
## Medical_Marijuana               0.00000
## Recreational_Marijuana          0.00001
## GSL                             0.00000
## PDMP                            0.00375
## Medicaid_Expansion              0.00000
## Two Year Intervention Effect    0.00000
## Number of States w Intervention 0.01738
## Intercept/Alabama               0.00000
#save the table into a CSV
# write.csv(round(sensitivity_anlys_redefine_int_covariate_table, 3), "./Data/coefficients_covariates_9_6_21_full_data_redefine_int.csv")

9.4 Attributable Deaths

################ Sensitivity Analysis 4: Number of Attributable Deaths ################
#first, we subset the data so that we only focus on the time points for which at least one state had the intervention
attr_deaths_anlys_redefine_int<-sensitivity_anlys_redefine_int_data[which(sensitivity_anlys_redefine_int_data$int_2_yr_effect>0),]

#compute the probability of overdose had intervention not occurred
prob_od_no_int_redefine_int<-expit(-coef(sensitivity_anlys_redefine_int_model)["int_2_yr_effect"]*attr_deaths_anlys_redefine_int$int_2_yr_effect
                                   + logit(attr_deaths_anlys_redefine_int$imputed_deaths/attr_deaths_anlys_redefine_int$population))

#compute the lower and upper bounds of 95% CI of probability of overdose had intervention not occurred
#here, we compute the lower and upper bounds of the 95% CI of all the coefficients using the standard error from the model
coef_lb<-coef(sensitivity_anlys_redefine_int_model) - 1.96*summary(sensitivity_anlys_redefine_int_model)$se
coef_ub<-coef(sensitivity_anlys_redefine_int_model) + 1.96*summary(sensitivity_anlys_redefine_int_model)$se

#we then calculate the upper and lower bounds of the probability of overdose death had intervention not occurred by using
#the lower and upper bounds of the coefficient of the intervention variable
prob_od_no_int_LB_redefine_int<-expit(-coef_lb[names(coef_lb) == "int_2_yr_effect"]*attr_deaths_anlys_redefine_int$int_2_yr_effect
                                      + logit(attr_deaths_anlys_redefine_int$imputed_deaths/attr_deaths_anlys_redefine_int$population))

prob_od_no_int_UB_redefine_int<-expit(-coef_ub[names(coef_ub) == "int_2_yr_effect"]*attr_deaths_anlys_redefine_int$int_2_yr_effect
                                      + logit(attr_deaths_anlys_redefine_int$imputed_deaths/attr_deaths_anlys_redefine_int$population))

#estimate the number of deaths attributable to the intervention
#first, initialize the vectors to store the numbers
num_attr_od_UB<-num_attr_od_LB<-num_attr_od<-rep(NA, length(unique(sensitivity_anlys_redefine_int_data$Time_Period_ID)))


#for each time period, we first find the indices of rows containing data from that time point
#then, we find the total number of deaths that attributable to the intervention

index<-1 #keep track of where to store the values in the vector

for(time in sort(unique(attr_deaths_anlys_redefine_int$Time_Period_ID))){
  #find the indices of rows where the time point = time
  time_point_index<-which(attr_deaths_anlys_redefine_int$Time_Period_ID == time)

  #find the number of deaths attributable to intervention = observed number of deaths with intervention - estimated number of deaths had intervention not occurred
  num_attr_od[index]<-sum(attr_deaths_anlys_redefine_int$imputed_deaths[time_point_index]
                          - prob_od_no_int_redefine_int[time_point_index]*attr_deaths_anlys_redefine_int$population[time_point_index])

  #find the lower and upper bounds of the estimated number of deaths attributable to the intervention
  num_attr_od_LB[index]<-sum(attr_deaths_anlys_redefine_int$imputed_deaths[time_point_index]
                             - prob_od_no_int_LB_redefine_int[time_point_index]*attr_deaths_anlys_redefine_int$population[time_point_index])
  num_attr_od_UB[index]<-sum(attr_deaths_anlys_redefine_int$imputed_deaths[time_point_index]
                             - prob_od_no_int_UB_redefine_int[time_point_index]*attr_deaths_anlys_redefine_int$population[time_point_index])


  index<-index + 1
}

num_attr_od_redefine_int<-data.frame("Time_Period_ID" = sort(unique(attr_deaths_anlys_redefine_int$Time_Period_ID)),
                                     "Time_Start" = sort(unique(attr_deaths_anlys_redefine_int$Time_Period_Start)),
                                     "Num_Attr_Deaths" = num_attr_od,
                                     "Num_Attr_Deaths_LB" = num_attr_od_LB,
                                     "Num_Attr_Deaths_UB" = num_attr_od_UB)

#sum up the total number of excess deaths attributable to the intervention
sum(num_attr_od_redefine_int$Num_Attr_Deaths)
## [1] 28144.81
#sum up the number of excess deaths per year
yearly_num_Attr_Deaths_redefine_int<-num_attr_od_redefine_int %>%
  group_by("year" = year(Time_Start)) %>%
  summarise("deaths" = sum(Num_Attr_Deaths),
            death_lb = sum(Num_Attr_Deaths_LB),
            death_ub = sum(Num_Attr_Deaths_UB))

summary(yearly_num_Attr_Deaths_redefine_int$deaths)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    27.8   494.0  1184.5  1407.2  1990.3  3358.7
ggplot(yearly_num_Attr_Deaths_redefine_int, aes(x = year, y = deaths)) + geom_line() + geom_point()

10 Sensitivity Analysis 5: Effect of Lagged DIH Prosecutions on Unintentional Overdose Deaths, Assuming Effect Lasts for Two Years

10.1 Analysis

############# Sensitivity Analysis 5: Two Year Effect Lagged by 6 months ######################
#create a plot for each state to see how many prosecution media alerts there are per 6 month period
#read in the prosecution media alert data
prosecution_data<-read.csv("./Data/dih_prosecutions_9_6_21.csv")

#data cleaning
#data cleaning
prosecution_data<-prosecution_data %>% 
  mutate(Date = as.Date(Date.charged, "%m/%d/%Y")) %>%
  mutate(State = ifelse(State.Filed == "pennsylvania", "Pennsylvania", State.Filed),
         State = ifelse(State.Filed == "Virginia ", "Virginia", State)) %>%
  filter(!is.na(Date), State.Filed != "No Info", State.Filed != "No info", State.Filed != "No Info ",
         State != "")

#clean up the data by looking at the link to the article
prosecution_data$Date[prosecution_data$Date == "2026-08-01"] <- as.Date("2016-02-15", "%Y-%m-%d")

#change the states into Character instead of factor
prosecution_data$State<-as.character(prosecution_data$State)
#see how many prosecution data points there are for each state
table(prosecution_data$State)
   Alabama         Alaska        Arizona       Arkansas     California 
        12              8              9              4             76 
  Colorado    Connecticut       Delaware        Florida        Georgia 
        32             47              3            138             29 
     Idaho       Illinois        Indiana           Iowa         Kansas 
         9            342             55             31              9 
  Kentucky      Louisiana          Maine       Maryland  Massachusetts 
        43             65             17             63             34 
  Michigan      Minnesota    Mississippi       Missouri        Montana 
       116            140              1             45             11 
  Nebraska         Nevada  New Hampshire     New Jersey     New Mexico 
         1             13             42            137              4 
  New York North Carolina   North Dakota           Ohio       Oklahoma 
       110            124             53            404             41 
    Oregon   Pennsylvania   Rhode Island South Carolina   South Dakota 
        19            726              2             12             13 
 Tennessee          Texas           Utah        Vermont       Virginia 
        94             44             21             13             63 
Washington  West Virginia      Wisconsin        Wyoming 
        78             33            381             19 
#change date charged into Date object
prosecution_data$Date<-mdy(prosecution_data$Date.charged)

#group the data into six month periods
prosecution_data<-prosecution_data %>% mutate(six_month_pd = lubridate::floor_date(Date , "6 months" ))

#count the number of prosecution media alerts in each six month period
#we also get the first and last date of prosecution in time period
prosecution_data_by_six_month_pd <- prosecution_data %>%
  filter(year(six_month_pd)>1999 & year(six_month_pd)<2020) %>%
  group_by(State, six_month_pd) %>%
  summarise(first_date_in_pd = min(Date), last_date_in_pd = max(Date))

#create the data set used for this sensitivity analysis
#first, we merge the grouped prosecution data set with the main data set by state and time period
sensitivity_anlys_2yr_int_lag<-merge(main_analysis_data,
                                     prosecution_data_by_six_month_pd,
                                     by.x = c("State", "Time_Period_Start"),
                                     by.y = c("State", "six_month_pd"), all = TRUE)

#create a intervention 2 year effect variable by initializing it to be all 0
sensitivity_anlys_2yr_int_lag<-sensitivity_anlys_2yr_int_lag %>% 
  group_by(State) %>%
  mutate(int_2_yr_effect_lag = 0)

#change the date into a date object
sensitivity_anlys_2yr_int_lag$Time_Period_Start<-as.Date(sensitivity_anlys_2yr_int_lag$Time_Period_Start)
sensitivity_anlys_2yr_int_lag$Time_Period_End<-as.Date(sensitivity_anlys_2yr_int_lag$Time_Period_End)

#we need to calculate the 2 year intervention variable depending on the case
#by examining each row of the data set
for(state in unique(sensitivity_anlys_2yr_int_lag$State)){
  #first, subset the data set into state_data which only contains the data for the state
  state_index<-which(sensitivity_anlys_2yr_int_lag$State == state)
  state_data<-sensitivity_anlys_2yr_int_lag[state_index,]

  for(i in 1:(nrow(state_data)-1)){
    #for the states that had at least one prosecution in the first 2 years of analysis period,
    #we impute the intervention variable based on the intervention variable of main analysis:
    #if intervention occurred in time i, then for the 6-month lagged effect, we compute the
    #proportion of days affected by intervention, taking into account the 6 month lag. Else,
    #if the intervention had occurred by time i, we impute a 1 in the next six-month interval
    #since lagged
    if(i %in% c(1:4)){
      if(state_data$Intervention_Redefined[i] > 0 & state_data$Intervention_Redefined[i] < 1){
        state_data$int_2_yr_effect_lag[i + 1] <- as.numeric(state_data$Time_Period_End[i + 1] - (state_data$first_date_in_pd[i] %m+% months(6)))/as.numeric(state_data$Time_Period_End[i + 1] - state_data$Time_Period_Start[i + 1])
      }else if(state_data$Intervention_Redefined[i] == 1){
        state_data$int_2_yr_effect_lag[i + 1] <- 1
      }

      #next, if there was at least one prosecution in the last 3 six-month periods (i.e. 1.5 years) before time i
      #These rows will be imputed with a 1 in the next six-month interval since lagged
    }else if((!is.na(state_data$first_date_in_pd[i - 1]) |
              !is.na(state_data$first_date_in_pd[i - 2]) |
              !is.na(state_data$first_date_in_pd[i - 3]))){

      state_data$int_2_yr_effect_lag[i + 1]<-1

    }else{
      #next, we account for the rows with the fractions:
      # 1) an intervention occurs in row i without an intervention 2 years ago
      # 2) row i contains the lasting effects of an intervention that occurred 2 years ago
      # 3) row i contains effects from both a new intervention starting in row i and lasting
      # effects from 2 years ago

      #To compute the fraction, we add the number of days that are affected by an intervention
      #(from both the current prosecution and previous prosecution) and then divide by the total
      #number of days in the period:

      #first, we compute the number of days in the period of time interval i + 1
      total_len_of_pd<-as.numeric(state_data$Time_Period_End[i + 1] - state_data$Time_Period_Start[i + 1])

      #If there is no prosecution two years ago, i.e. in period i-4, then the last_date is the first
      #date in period i + 1. We subtract the last_date by the first date in period i + 1, so we will get
      #a 0 for the number of days that are affected by a prosecution from period i-4. Otherwise,
      #the last_date is the last date of prosecution from period i-4, plus 2 years.
      #The start time is the first date of period i + 1

      len_of_past_effect <- ifelse(!is.na(state_data$first_date_in_pd[i - 4]),
                                   as.numeric((as.Date(state_data$last_date_in_pd[i - 4] + years(2), format = "%Y-%m-%d") %m+% months(6)) - state_data$Time_Period_Start[i + 1]),
                                   0)

      #If there is no prosecution in the period i, then the start_date is the last date in period i + 1 (because lagged effect).
      #We subtract start_date from the last date in period i + 1, so we will get a 0 for the number
      #of days that are affected by a prosecution in period i. Otherwise, the start_date is the
      #first date of a prosecution in period i + 6 months. The end date is the last date in period i + 1.

      len_of_current_effect <- ifelse(!is.na(state_data$first_date_in_pd[i]),
                                      as.numeric(state_data$Time_Period_End[i + 1] - (state_data$first_date_in_pd[i] %m+% months(6))),
                                      0)

      state_data$int_2_yr_effect_lag[i + 1]<-(len_of_past_effect + len_of_current_effect)/total_len_of_pd
    }
  }

  #for the case where the int_2_yr_effect is greater than 1 (could result when we add the effects of
  #previous intervention and the current intervention), we just impute a 1 instead
  state_data$int_2_yr_effect_lag[state_data$int_2_yr_effect_lag>1]<-1

  #lastly, we store the int_2_yr_effect variable into the sensitivity analysis data set
  sensitivity_anlys_2yr_int_lag$int_2_yr_effect_lag[state_index]<-state_data$int_2_yr_effect_lag
}

sensitivity_anlys_2yr_int_lag <- sensitivity_anlys_2yr_int_lag %>%
  group_by(Time_Period_Start) %>%
  mutate(num_states_w_intervention_2yr_lag = sum(int_2_yr_effect_lag))
#view the data set just to make sure the imputation looks right
# View(sensitivity_anlys_2yr_int_lag %>% select(State, Time_Period_Start, Time_Period_End,
#                                                     Intervention_Redefined, first_date_in_pd,
#                                                     last_date_in_pd,
#                                                     int_2_yr_effect_lag))


#run the analysis for all the states
lagged_analysis_model<-gam(cbind(round(imputed_deaths), round(num_alive))~ State +
                             s(Time_Period_ID, bs = "cr", by = as.factor(Region)) +
                             Naloxone_Pharmacy_Yes_Redefined +
                             Naloxone_Pharmacy_No_Redefined +
                             Medical_Marijuana_Redefined +
                             Recreational_Marijuana_Redefined +
                             GSL_Redefined +
                             PDMP_Redefined +
                             Medicaid_Expansion_Redefined +
                             int_2_yr_effect_lag + 
                             num_states_w_intervention_2yr_lag,
                           data = sensitivity_anlys_2yr_int_lag, family = "binomial")

#summary output of the model
stargazer(lagged_analysis_model, type = "html", dep.var.labels = "Unintentional Overdose Death")
Dependent variable:
Unintentional Overdose Death
StateAlaska 0.259***
(0.028)
StateArizona 0.310***
(0.014)
StateArkansas -0.382***
(0.020)
StateCalifornia -0.153***
(0.013)
StateColorado 0.087***
(0.016)
StateConnecticut 0.183***
(0.016)
StateDelaware 0.414***
(0.022)
StateFlorida 0.256***
(0.012)
StateGeorgia -0.056***
(0.013)
StateHawaii -0.253***
(0.025)
StateIdaho -0.149***
(0.024)
StateIllinois -0.015
(0.013)
StateIndiana 0.083***
(0.014)
StateIowa -0.737***
(0.021)
StateKansas -0.315***
(0.019)
StateKentucky 0.633***
(0.014)
StateLouisiana 0.297***
(0.014)
StateMaine 0.143***
(0.022)
StateMaryland -1.062***
(0.019)
StateMassachusetts 0.209***
(0.014)
StateMichigan -0.022
(0.014)
StateMinnesota -0.623***
(0.017)
StateMississippi -0.102***
(0.018)
StateMissouri 0.199***
(0.015)
StateMontana -0.352***
(0.029)
StateNebraska -0.875***
(0.029)
StateNevada 0.444***
(0.017)
StateNew Hampshire 0.251***
(0.020)
StateNew Jersey 0.112***
(0.013)
StateNew Mexico 0.634***
(0.017)
StateNew York -0.242***
(0.013)
StateNorth Carolina 0.180***
(0.013)
StateNorth Dakota -1.061***
(0.045)
StateOhio 0.457***
(0.012)
StateOklahoma 0.379***
(0.015)
StateOregon -0.202***
(0.018)
StatePennsylvania 0.436***
(0.012)
StateRhode Island 0.237***
(0.022)
StateSouth Carolina 0.209***
(0.015)
StateSouth Dakota -0.973***
(0.043)
StateTennessee 0.442***
(0.013)
StateTexas -0.195***
(0.012)
StateUtah 0.080***
(0.018)
StateVermont -0.164***
(0.031)
StateVirginia -0.109***
(0.014)
StateWashington 0.069***
(0.015)
StateWest Virginia 0.866***
(0.015)
StateWisconsin -0.035**
(0.015)
StateWyoming 0.022
(0.034)
Naloxone_Pharmacy_Yes_Redefined -0.027***
(0.008)
Naloxone_Pharmacy_No_Redefined 0.005
(0.007)
Medical_Marijuana_Redefined 0.066***
(0.006)
Recreational_Marijuana_Redefined -0.039***
(0.009)
GSL_Redefined 0.034***
(0.006)
PDMP_Redefined -0.018***
(0.006)
Medicaid_Expansion_Redefined 0.102***
(0.006)
int_2_yr_effect_lag 0.036***
(0.005)
num_states_w_intervention_2yr_lag 0.0001
(0.001)
s(Time_Period_ID):as.factor(Region)Midwest.1
s(Time_Period_ID):as.factor(Region)Midwest.2
s(Time_Period_ID):as.factor(Region)Midwest.3
s(Time_Period_ID):as.factor(Region)Midwest.4
s(Time_Period_ID):as.factor(Region)Midwest.5
s(Time_Period_ID):as.factor(Region)Midwest.6
s(Time_Period_ID):as.factor(Region)Midwest.7
s(Time_Period_ID):as.factor(Region)Midwest.8
s(Time_Period_ID):as.factor(Region)Midwest.9
s(Time_Period_ID):as.factor(Region)Northeast.1
s(Time_Period_ID):as.factor(Region)Northeast.2
s(Time_Period_ID):as.factor(Region)Northeast.3
s(Time_Period_ID):as.factor(Region)Northeast.4
s(Time_Period_ID):as.factor(Region)Northeast.5
s(Time_Period_ID):as.factor(Region)Northeast.6
s(Time_Period_ID):as.factor(Region)Northeast.7
s(Time_Period_ID):as.factor(Region)Northeast.8
s(Time_Period_ID):as.factor(Region)Northeast.9
s(Time_Period_ID):as.factor(Region)South.1
s(Time_Period_ID):as.factor(Region)South.2
s(Time_Period_ID):as.factor(Region)South.3
s(Time_Period_ID):as.factor(Region)South.4
s(Time_Period_ID):as.factor(Region)South.5
s(Time_Period_ID):as.factor(Region)South.6
s(Time_Period_ID):as.factor(Region)South.7
s(Time_Period_ID):as.factor(Region)South.8
s(Time_Period_ID):as.factor(Region)South.9
s(Time_Period_ID):as.factor(Region)West.1
s(Time_Period_ID):as.factor(Region)West.2
s(Time_Period_ID):as.factor(Region)West.3
s(Time_Period_ID):as.factor(Region)West.4
s(Time_Period_ID):as.factor(Region)West.5
s(Time_Period_ID):as.factor(Region)West.6
s(Time_Period_ID):as.factor(Region)West.7
s(Time_Period_ID):as.factor(Region)West.8
s(Time_Period_ID):as.factor(Region)West.9
Constant -9.787***
(0.030)
Observations 2,000
Adjusted R2 0.911
Log Likelihood -16,792.500
UBRE 8.882
Note: p<0.1; p<0.05; p<0.01

10.2 Plots

gam.check(lagged_analysis_model)

## 
## Method: UBRE   Optimizer: outer newton
## full convergence after 7 iterations.
## Gradient range [-7.10661e-07,2.358126e-05]
## (score 8.881691 & scale 1).
## Hessian positive definite, eigenvalue range [8.779527e-05,0.0005068349].
## Model rank =  95 / 95 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                                                k'  edf k-index p-value
## s(Time_Period_ID):as.factor(Region)Midwest   9.00 8.88    1.05    1.00
## s(Time_Period_ID):as.factor(Region)Northeast 9.00 8.95    1.05    0.99
## s(Time_Period_ID):as.factor(Region)South     9.00 8.92    1.05    0.99
## s(Time_Period_ID):as.factor(Region)West      9.00 7.88    1.05    0.99

10.3 Compile Results

############## Sensitivity Analysis 5: Make Data Frame of Results and 95% CI ###########
#store the coefficients into the table
sensitivity_anlys_2yr_int_lag_full_table<-data.frame(coef(lagged_analysis_model))
#check to see how the table looks
head(sensitivity_anlys_2yr_int_lag_full_table)
##                 coef.lagged_analysis_model.
## (Intercept)                     -9.78710373
## StateAlaska                      0.25891518
## StateArizona                     0.30958384
## StateArkansas                   -0.38223314
## StateCalifornia                 -0.15314559
## StateColorado                    0.08682166
#rename the column to "Coefficient_Estimate"
colnames(sensitivity_anlys_2yr_int_lag_full_table)<-c("Coefficient_Estimate")

#vector of covariates
covariates<-c("Naloxone_Pharmacy_Yes_Redefined", 
              "Naloxone_Pharmacy_No_Redefined",
              "Medical_Marijuana_Redefined",
              "Recreational_Marijuana_Redefined",
              "GSL_Redefined", 
              "PDMP_Redefined",
              "Medicaid_Expansion_Redefined", 
              "int_2_yr_effect_lag", 
              "num_states_w_intervention_2yr_lag")

#rename the variable names of the regression output so that they look nicer:
#currently there are 3 types of coefficients: state effects, the covariates, and smoothed time effects
#for each row in the main analysis table
for(i in 1:length(rownames(sensitivity_anlys_2yr_int_lag_full_table))){

  #if the coefficient is not in the covariates vector
  if(!(rownames(sensitivity_anlys_2yr_int_lag_full_table)[i] %in% covariates)){

    #we see if it's a state effect
    if(substr(rownames(sensitivity_anlys_2yr_int_lag_full_table)[i], start = 1, stop = 5) == "State"){

      #if so, here, the names look like: StateMassachusetts or StateGeorgia, so take out the "State" part
      #and just rename these rows to just the state name
      rownames(sensitivity_anlys_2yr_int_lag_full_table)[i]<-substr(rownames(sensitivity_anlys_2yr_int_lag_full_table)[i], start = 6,
                                                                    stop = nchar(rownames(sensitivity_anlys_2yr_int_lag_full_table)[i]))

    }else if(rownames(sensitivity_anlys_2yr_int_lag_full_table)[i] == "(Intercept)"){

      #otherwise, if the current name is Intercept, we rename it so that we know that Alabama is the baseline
      rownames(sensitivity_anlys_2yr_int_lag_full_table)[i]<-"Intercept/Alabama"

    }else if(substr(rownames(sensitivity_anlys_2yr_int_lag_full_table)[i], start = 1, stop = 35) == "s(Time_Period_ID):as.factor(Region)"){

      #otherwise, it's the smoothed time effects which look like: s(Time_Period_ID):as.factor(Region)West
      #or s(Time_Period_ID):as.factor(Region)South, so we want to get rid of "s(Time_Period_ID):as.factor(Region)"
      #and change it to "Smoothed Time for Region"
      rownames(sensitivity_anlys_2yr_int_lag_full_table)[i]<-paste("Smoothed Time for Region ",
                                                                   substr(rownames(sensitivity_anlys_2yr_int_lag_full_table)[i], start = 36,
                                                                          stop = nchar(rownames(sensitivity_anlys_2yr_int_lag_full_table)[i])),
                                                                   sep = "")

    }
  }
}

#confidence intervals for the coefficients
sensitivity_anlys_2yr_int_lag_full_table$Coefficient_Lower_Bound<-sensitivity_anlys_2yr_int_lag_full_table$Coefficient_Estimate - 1.96*summary(lagged_analysis_model)$se
sensitivity_anlys_2yr_int_lag_full_table$Coefficient_Upper_Bound<-sensitivity_anlys_2yr_int_lag_full_table$Coefficient_Estimate + 1.96*summary(lagged_analysis_model)$se

#impute the estimates and confidence intervals in the odds ratio scale
sensitivity_anlys_2yr_int_lag_full_table$Odds_Ratio<-exp(sensitivity_anlys_2yr_int_lag_full_table$Coefficient_Estimate)
sensitivity_anlys_2yr_int_lag_full_table$Odds_Ratio_LB<-exp(sensitivity_anlys_2yr_int_lag_full_table$Coefficient_Lower_Bound)
sensitivity_anlys_2yr_int_lag_full_table$Odds_Ratio_UB<-exp(sensitivity_anlys_2yr_int_lag_full_table$Coefficient_Upper_Bound)

#store the standard error and p-value
sensitivity_anlys_2yr_int_lag_full_table$Standard_Error<-summary(lagged_analysis_model)$se
#note that there is no p-value for the smoothed time effects, so we put a NA for those rows
sensitivity_anlys_2yr_int_lag_full_table$p_value<-c(summary(lagged_analysis_model)$p.pv, rep(NA, length(coef(lagged_analysis_model)) - length(summary(lagged_analysis_model)$p.pv)))

head(sensitivity_anlys_2yr_int_lag_full_table)
##                   Coefficient_Estimate Coefficient_Lower_Bound
## Intercept/Alabama          -9.78710373             -9.84541050
## Alaska                      0.25891518              0.20497761
## Arizona                     0.30958384              0.28228183
## Arkansas                   -0.38223314             -0.42101146
## California                 -0.15314559             -0.17910494
## Colorado                    0.08682166              0.05546585
##                   Coefficient_Upper_Bound   Odds_Ratio Odds_Ratio_LB
## Intercept/Alabama              -9.7287970 5.617135e-05  5.298983e-05
## Alaska                          0.3128528 1.295524e+00  1.227498e+00
## Arizona                         0.3368859 1.362858e+00  1.326152e+00
## Arkansas                       -0.3434548 6.823360e-01  6.563826e-01
## California                     -0.1271862 8.580048e-01  8.360182e-01
## Colorado                        0.1181775 1.090702e+00  1.057033e+00
##                   Odds_Ratio_UB Standard_Error       p_value
## Intercept/Alabama  5.954389e-05     0.02974835  0.000000e+00
## Alaska             1.367320e+00     0.02751917  5.030825e-21
## Arizona            1.400579e+00     0.01392960 1.973535e-109
## Arkansas           7.093155e-01     0.01978486  3.683783e-83
## California         8.805697e-01     0.01324457  6.352769e-31
## Colorado           1.125444e+00     0.01599786  5.728374e-08
tail(sensitivity_anlys_2yr_int_lag_full_table)
##                                 Coefficient_Estimate Coefficient_Lower_Bound
## Smoothed Time for Region West.4            0.2590000               0.2386586
## Smoothed Time for Region West.5            0.2418414               0.2090413
## Smoothed Time for Region West.6            0.2408127               0.1988066
## Smoothed Time for Region West.7            0.3008425               0.2471347
## Smoothed Time for Region West.8            0.4279615               0.3621048
## Smoothed Time for Region West.9            0.5172422               0.4535700
##                                 Coefficient_Upper_Bound Odds_Ratio
## Smoothed Time for Region West.4               0.2793414   1.295634
## Smoothed Time for Region West.5               0.2746415   1.273592
## Smoothed Time for Region West.6               0.2828187   1.272283
## Smoothed Time for Region West.7               0.3545502   1.350997
## Smoothed Time for Region West.8               0.4938181   1.534127
## Smoothed Time for Region West.9               0.5809143   1.677395
##                                 Odds_Ratio_LB Odds_Ratio_UB Standard_Error
## Smoothed Time for Region West.4      1.269545      1.322259     0.01037827
## Smoothed Time for Region West.5      1.232496      1.316059     0.01673476
## Smoothed Time for Region West.6      1.219946      1.326865     0.02143167
## Smoothed Time for Region West.7      1.280352      1.425539     0.02740191
## Smoothed Time for Region West.8      1.436349      1.638560     0.03360033
## Smoothed Time for Region West.9      1.573921      1.787672     0.03248578
##                                 p_value
## Smoothed Time for Region West.4      NA
## Smoothed Time for Region West.5      NA
## Smoothed Time for Region West.6      NA
## Smoothed Time for Region West.7      NA
## Smoothed Time for Region West.8      NA
## Smoothed Time for Region West.9      NA
#save the table into a CSV
# write.csv(round(sensitivity_anlys_2yr_int_lag_full_table,5), "./Data/coefficients_GAM_9_6_21_lagged_2yr_int.csv")

#export a table with just the covariates
#first, find the rows that contains the covariates
covariate_Index<-which(rownames(sensitivity_anlys_2yr_int_lag_full_table) %in% covariates)
sens_analysis_2yr_int_lag_covariate_table<-(round(sensitivity_anlys_2yr_int_lag_full_table[covariate_Index,], 5))

#rename the variables so that it looks cleaner
rownames(sens_analysis_2yr_int_lag_covariate_table)<-c("Naloxone_Pharmacy_Yes",
                                                       "Naloxone_Pharmacy_No",
                                                       "Medical_Marijuana",
                                                       "Recreational_Marijuana",
                                                       "GSL", 
                                                       "PDMP", 
                                                       "Medicaid_Expansion",
                                                       "Intervention", 
                                                       "Number of States w Intervention")

#now, reorganize the data so that the covariates are on top and the rest of the variable sare below
sens_analysis_2yr_int_lag_covariate_table<-rbind(sens_analysis_2yr_int_lag_covariate_table, sensitivity_anlys_2yr_int_lag_full_table[-covariate_Index,])
#remove the columns that aren't in odds ratio scale
sens_analysis_2yr_int_lag_covariate_table<-sens_analysis_2yr_int_lag_covariate_table[,-which(colnames(sens_analysis_2yr_int_lag_covariate_table) %in%
                                                                                               c("Coefficient_Estimate", "Coefficient_Lower_Bound", "Coefficient_Upper_Bound", "Standard_Error"))]

colnames(sens_analysis_2yr_int_lag_covariate_table)<-c("Risk_Ratio_Estimates", "RR_95_CI_LB", "RR_95_CI_UB", "p-value")
head(sens_analysis_2yr_int_lag_covariate_table, 10)
##                                 Risk_Ratio_Estimates  RR_95_CI_LB  RR_95_CI_UB
## Naloxone_Pharmacy_Yes                   9.729600e-01 9.582600e-01 9.878900e-01
## Naloxone_Pharmacy_No                    1.005420e+00 9.926400e-01 1.018360e+00
## Medical_Marijuana                       1.067910e+00 1.056120e+00 1.079830e+00
## Recreational_Marijuana                  9.615700e-01 9.454500e-01 9.779600e-01
## GSL                                     1.034410e+00 1.023050e+00 1.045880e+00
## PDMP                                    9.823800e-01 9.712700e-01 9.936200e-01
## Medicaid_Expansion                      1.107430e+00 1.094400e+00 1.120620e+00
## Intervention                            1.036340e+00 1.026950e+00 1.045800e+00
## Number of States w Intervention         1.000110e+00 9.978100e-01 1.002420e+00
## Intercept/Alabama                       5.617135e-05 5.298983e-05 5.954389e-05
##                                 p-value
## Naloxone_Pharmacy_Yes           0.00042
## Naloxone_Pharmacy_No            0.40772
## Medical_Marijuana               0.00000
## Recreational_Marijuana          0.00001
## GSL                             0.00000
## PDMP                            0.00219
## Medicaid_Expansion              0.00000
## Intervention                    0.00000
## Number of States w Intervention 0.92543
## Intercept/Alabama               0.00000
#save the table into a CSV
# write.csv(round(sens_analysis_2yr_int_lag_covariate_table, 3), "./Data/coefficients_covariates_9_6_21_2_yr_int_lag.csv")

10.4 Attributable Deaths

################# Sensitivity Analysis 5: Number of Attributable Deaths #################
#find the number of deaths attributable to the intervention
#first, we subset the data so that we only focus on the time points for which at least one state had the intervention
attr_deaths_anlys_2yr_int_lag<-sensitivity_anlys_2yr_int_lag[which(sensitivity_anlys_2yr_int_lag$int_2_yr_effect_lag>0),]

#compute the probability of overdose had intervention not occurred
prob_od_no_int_2yr_int_lag<-expit(-coef(lagged_analysis_model)["int_2_yr_effect_lag"]
                                  + logit(attr_deaths_anlys_2yr_int_lag$imputed_deaths/attr_deaths_anlys_2yr_int_lag$population))

#compute the lower and upper bounds of 95% CI of probability of overdose had intervention not occurred
#here, we compute the lower and upper bounds of the 95% CI of all the coefficients using the standard error from the model
coef_lb<-coef(lagged_analysis_model) - 1.96*summary(lagged_analysis_model)$se
coef_ub<-coef(lagged_analysis_model) + 1.96*summary(lagged_analysis_model)$se

#we then calculate the upper and lower bounds of the probability of overdose death had intervention not occurred by using
#the lower and upper bounds of the coefficient of the intervention variable
prob_od_no_int_LB_2yr_int_lag<-expit(-coef_lb[names(coef_lb) == "int_2_yr_effect_lag"]
                                     + logit(attr_deaths_anlys_2yr_int_lag$imputed_deaths/attr_deaths_anlys_2yr_int_lag$population))

prob_od_no_int_UB_2yr_int_lag<-expit(-coef_ub[names(coef_ub) == "int_2_yr_effect_lag"]
                                     + logit(attr_deaths_anlys_2yr_int_lag$imputed_deaths/attr_deaths_anlys_2yr_int_lag$population))


#estimate the number of deaths attributable to the intervention
#first, initialize the vectors to store the numbers
num_attr_od_UB<-num_attr_od_LB<-num_attr_od<-rep(NA, length(unique(attr_deaths_anlys_2yr_int_lag$Time_Period_ID)))


#for each time period, we first find the indices of rows containing data from that time point
#then, we find the total number of deaths attributable to the intervention

index<-1 #keep track of where to store the values in the vector

for(time in sort(unique(attr_deaths_anlys_2yr_int_lag$Time_Period_ID))){
  #find the indices of rows where the time point = time
  time_point_index<-which(attr_deaths_anlys_2yr_int_lag$Time_Period_ID == time)

  #find the number of deaths attributable to intervention = observed number of deaths with intervention - estimated number of deaths had intervention not occurred
  num_attr_od[index]<-sum(attr_deaths_anlys_2yr_int_lag$imputed_deaths[time_point_index]
                          - prob_od_no_int_2yr_int_lag[time_point_index]*attr_deaths_anlys_2yr_int_lag$population[time_point_index])
  #find the lower and upper bounds of the estimated number of deaths attributable to the intervention
  num_attr_od_LB[index]<-sum(attr_deaths_anlys_2yr_int_lag$imputed_deaths[time_point_index]
                             - prob_od_no_int_LB_2yr_int_lag[time_point_index]*attr_deaths_anlys_2yr_int_lag$population[time_point_index])
  num_attr_od_UB[index]<-sum(attr_deaths_anlys_2yr_int_lag$imputed_deaths[time_point_index]
                             - prob_od_no_int_UB_2yr_int_lag[time_point_index]*attr_deaths_anlys_2yr_int_lag$population[time_point_index])
  index<-index + 1
}

num_attr_od_2yr_int_lag<-data.frame("Time_Period_ID" = sort(unique(attr_deaths_anlys_2yr_int_lag$Time_Period_ID)),
                                    "Time_Start" = sort(unique(attr_deaths_anlys_2yr_int_lag$Time_Period_Start)),
                                    "Num_Attr_Deaths" = num_attr_od,
                                    "Num_Attr_Deaths_LB" = num_attr_od_LB,
                                    "Num_Attr_Deaths_UB" = num_attr_od_UB)

#sum up the total number of excess deaths attributable to the intervention
sum(num_attr_od_2yr_int_lag$Num_Attr_Deaths)
## [1] 17799.01
summary(num_attr_od_2yr_int_lag$Num_Attr_Deaths)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   11.61  165.88  389.67  456.38  654.51 1106.00
num_attr_od_2yr_int_lag$Time_Start<-as.Date(num_attr_od_2yr_int_lag$Time_Start)

#compute the 95% CI for the total
sum(num_attr_od_2yr_int_lag$Num_Attr_Deaths_LB)
## [1] 13323.73
sum(num_attr_od_2yr_int_lag$Num_Attr_Deaths_UB)
## [1] 22233.78
#sum up the number of excess deaths per year
yearly_num_Attr_Deaths_2yr_int_lag<-num_attr_od_2yr_int_lag %>%
  group_by("year" = year(Time_Start)) %>%
  summarise("deaths" = sum(Num_Attr_Deaths), death_lb = sum(Num_Attr_Deaths_LB),
            death_ub = sum(Num_Attr_Deaths_UB))

summary(yearly_num_Attr_Deaths_2yr_int_lag$deaths)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   11.61  305.94  746.89  889.95 1264.13 2107.53
summary(yearly_num_Attr_Deaths_2yr_int_lag$death_lb)
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##    8.687  229.016  559.093  666.187  946.285 1577.626
summary(yearly_num_Attr_Deaths_2yr_int_lag$death_ub)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    14.5   382.2   933.0  1111.7  1579.1  2632.6
ggplot(yearly_num_Attr_Deaths_2yr_int_lag, aes(x = year, y = deaths)) + geom_line() + geom_point()

10.5 Compile Attributable Deaths Results

########################## Compiled Attributable Deaths Plot###############################
#add color column to the datasets
yearly_num_Attr_Deaths_main_analysis <- yearly_num_Attr_Deaths_main_analysis %>% 
  mutate(gp_type = "a")
yearly_num_Attr_Deaths_exclude_states <- yearly_num_Attr_Deaths_exclude_states %>%
  mutate(gp_type = "c")
yearly_num_Attr_Deaths_od_all <- yearly_num_Attr_Deaths_od_all %>%
  mutate(gp_type = "b")
yearly_num_Attr_Deaths_redefine_int <- yearly_num_Attr_Deaths_redefine_int %>%
  mutate(gp_type = "d")
pdf("Figures/num_attr_deaths_yearly_for_all_anlys_9_6_21_all_od_shape_alpha.pdf")
ggplot(yearly_num_Attr_Deaths_main_analysis) +
  geom_line(aes(x = as.Date(as.yearmon(year)), y = deaths, group = 1, color = gp_type,
                linetype = "Estimate")) +
  geom_point(aes(x = as.Date(as.yearmon(year)), y = deaths, group = 1, color = gp_type, shape = gp_type))  +
  geom_line(yearly_num_Attr_Deaths_main_analysis,
            mapping = aes(x = as.Date(as.yearmon(year)), y = death_lb, group = 1,
                          color = gp_type, linetype = "95% Confidence Interval")) +
  geom_line(yearly_num_Attr_Deaths_main_analysis,
            mapping = aes(x = as.Date(as.yearmon(year)), y = death_ub, group = 1,
                          color =gp_type,linetype = "95% Confidence Interval")) +
  # geom_point(yearly_num_Attr_Deaths_main_analysis,
  #           mapping = aes(x = as.Date(as.yearmon(year)), y = death_lb, group = 1,
  #                         color = "a")) +
  # geom_point(yearly_num_Attr_Deaths_main_analysis,
  #           mapping = aes(x = as.Date(as.yearmon(year)), y = death_ub, group = 1,
  #                         color ="a")) +
  geom_line(yearly_num_Attr_Deaths_redefine_int,
            mapping = aes(x = as.Date(as.yearmon(year)), y = deaths, group = 1,
                          color = gp_type, linetype = "Estimate")) +
  geom_point(yearly_num_Attr_Deaths_redefine_int,
             mapping = aes(x = as.Date(as.yearmon(year)), y = deaths, group = 1,
                           color = gp_type, shape = gp_type))  +
  geom_line(yearly_num_Attr_Deaths_redefine_int,
            mapping = aes(x = as.Date(as.yearmon(year)), y = death_lb, group = 1,
                          color = gp_type, linetype = "95% Confidence Interval")) +
  geom_line(yearly_num_Attr_Deaths_redefine_int,
            mapping = aes(x = as.Date(as.yearmon(year)), y = death_ub, group = 1,
                          color = gp_type, linetype = "95% Confidence Interval")) +
  # geom_point(yearly_num_Attr_Deaths_redefine_int,
  #           mapping = aes(x = as.Date(as.yearmon(year)), y = death_lb, group = 1,
  #                         color = "d")) +
  # geom_point(yearly_num_Attr_Deaths_redefine_int,
  #           mapping = aes(x = as.Date(as.yearmon(year)), y = death_ub, group = 1,
  #                         color = "d")) +
  geom_line(yearly_num_Attr_Deaths_od_all, mapping = aes(x = as.Date(as.yearmon(year)), y = deaths, group = 1,
                                                         color = gp_type, linetype = "Estimate"), alpha = 1) +
  geom_point(yearly_num_Attr_Deaths_od_all, mapping = aes(x = as.Date(as.yearmon(year)), y = deaths, group = 1,
                                                          color = gp_type, shape = gp_type), alpha = 1)  +
  geom_line(yearly_num_Attr_Deaths_od_all,
            mapping = aes(x = as.Date(as.yearmon(year)), y = death_lb, group = 1,
                          color = gp_type, linetype = "95% Confidence Interval"), alpha = 1) +
  geom_line(yearly_num_Attr_Deaths_od_all,
            mapping = aes(x = as.Date(as.yearmon(year)), y = death_ub, group = 1,
                          color = gp_type, linetype = "95% Confidence Interval"),  alpha = 1) +
  # geom_point(yearly_num_Attr_Deaths_od_all,
  #           mapping = aes(x = as.Date(as.yearmon(year)), y = death_lb, group = 1,
  #                         color = "b"), alpha = 0.5) +
  # geom_point(yearly_num_Attr_Deaths_od_all,
  #           mapping = aes(x = as.Date(as.yearmon(year)), y = death_ub, group = 1,
  #                         color = "b") , alpha = 0.5) +
  geom_line(yearly_num_Attr_Deaths_exclude_states,
            mapping = aes(x = as.Date(as.yearmon(year)), y = deaths, group = 1,
                          color = gp_type, linetype = "Estimate"), alpha = 1) +
  geom_point(yearly_num_Attr_Deaths_exclude_states, mapping = aes(x = as.Date(as.yearmon(year)), y = deaths, group = 1,
                                                                  color = gp_type, shape = gp_type), alpha = 1)  +
  geom_line(yearly_num_Attr_Deaths_exclude_states,
            mapping = aes(x = as.Date(as.yearmon(year)), y = death_ub, group = 1,
                          color = gp_type, linetype = "95% Confidence Interval"),alpha = 1) +
  geom_line(yearly_num_Attr_Deaths_exclude_states,
            mapping = aes(x = as.Date(as.yearmon(year)), y = death_lb, group = 1,
                          color = gp_type, linetype = "95% Confidence Interval"), alpha = 1) +
    # geom_point(yearly_num_Attr_Deaths_exclude_states,
    #           mapping = aes(x = as.Date(as.yearmon(year)), y = death_ub, group = 1,
    #                         color = "c"),alpha = 0.5) +
    # geom_point(yearly_num_Attr_Deaths_exclude_states,
    #           mapping = aes(x = as.Date(as.yearmon(year)), y = death_lb, group = 1,
    #                         color = "c"), alpha = 0.5) +
 theme(axis.text.y=element_text(size=10, family = "Times"),
        axis.title=element_text(size=10,face="bold", family = "Times"),
        panel.border = element_blank(), panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"),
        axis.text.x = element_text(size = 10, family = "Times"),
        panel.background = element_rect("white"),
        legend.text=element_text(size=9, family = "Times"),
        legend.position = c(.4,.85),
       # legend.position = "bottom",
        legend.box="vertical", legend.margin=margin()) +
  guides(color=guide_legend(nrow=2,byrow=TRUE)) +
  labs(x = "Year", y = "Yearly Drug Overdose Deaths Attributable to DIH Prosecutions Reported in Media", color = "",
       linetype = "", shape = "") +
  scale_color_manual(values=c('black', 'red', 'green', 'deepskyblue'),
                     labels = c("Main Analysis: Unintentional Drug Overdose Deaths", "All Drug Overdose Deaths",
                                "Excluding States with At Least 75% Missing Monthly", "2 Year Effect")) +
  scale_x_date(date_labels="%Y", breaks = seq(as.Date("2000-01-01"), as.Date("2018-01-01"), by = "2 years")) +
  scale_linetype_manual(values = c("Estimate" = "solid", "95% Confidence Interval" = "dashed"),
                        breaks = c("Estimate", "95% Confidence Interval")) +
  guides(linetype = guide_legend(nrow = 1)) + 
  scale_shape_manual(values = c(16, 4, 5, 2), 
                     labels = c("Main Analysis: Unintentional Drug Overdose Deaths", "All Drug Overdose Deaths",
                                "Excluding States with At Least 75% Missing Monthly", "2 Year Effect"))

dev.off()
## quartz_off_screen 
##                 2

11 Sensitivity Analysis 10: Including Unemployment Rates and Age Groups

11.1 Clean Data

#create age groups using the population dataset
pop_by_age_2010<- read.csv("./Data/pop_by_age_state_2000_2010.csv")
head(pop_by_age_2010)
##   REGION DIVISION STATE          NAME SEX AGE ESTIMATESBASE2000 POPESTIMATE2000
## 1      0        0     0 United States   0   0           3805718         3855956
## 2      0        0     0 United States   0   1           3820647         3798691
## 3      0        0     0 United States   0   2           3790534         3800144
## 4      0        0     0 United States   0   3           3832855         3821118
## 5      0        0     0 United States   0   4           3926400         3902384
## 6      0        0     0 United States   0   5           3965175         3967834
##   POPESTIMATE2001 POPESTIMATE2002 POPESTIMATE2003 POPESTIMATE2004
## 1         4012658         3951461         3975871         4014258
## 2         3855407         4004674         3936139         3953063
## 3         3800096         3856114         4002836         3933735
## 4         3802710         3804336         3860727         4008220
## 5         3827346         3812607         3816873         3876609
## 6         3910033         3837187         3823568         3829607
##   POPESTIMATE2005 POPESTIMATE2006 POPESTIMATE2007 POPESTIMATE2008
## 1         4004393         4041738         4147997         4132735
## 2         3987032         3972124         4002215         4100756
## 3         3952632         3988119         3973479         4004146
## 4         3943215         3966022         4004011         3992320
## 5         4030128         3970880         3998260         4041170
## 6         3893128         4050582         3993489         4024297
##   POPESTIMATE2009 CENSUS2010POP POPESTIMATE2010
## 1         4003587       3944153         3952444
## 2         4078797       3978070         3951024
## 3         4103002       4096929         4087074
## 4         4025675       4119040         4133855
## 5         4033457       4063170         4076132
## 6         4070265       4056858         4069577
#base the age groups on abouk, powell, pacula
#age groups: 0 - 17, 18 - 34, 35 - 54, 65+
pop_by_age_2010_group <- pop_by_age_2010 %>% 
  #999 indicates total population
  filter(AGE < 999, AGE >= 18) %>%
  #create age groups
  mutate(age_groups = ifelse(AGE >= 18 & AGE <= 24, "18_24", 
                             ifelse(AGE >= 25 & AGE <= 34, "25_34",
                                    ifelse(AGE >= 35 & AGE <= 44, "35_44", 
                                           ifelse(AGE >= 45 & AGE <= 64, "45_64", "65_plus"))))) %>%
  #we filter according to how the DIH prosecutions were filtered, excluding cases with minors
  filter(NAME != "United States", SEX == 0) %>%
  group_by(NAME, age_groups) %>%
  summarise(pop_2000 = sum(ESTIMATESBASE2000),
            pop_2001 = sum(POPESTIMATE2001),
            pop_2002 = sum(POPESTIMATE2002),
            pop_2003 = sum(POPESTIMATE2003),
            pop_2004 = sum(POPESTIMATE2004),
            pop_2005 = sum(POPESTIMATE2005),
            pop_2006 = sum(POPESTIMATE2006),
            pop_2007 = sum(POPESTIMATE2007),
            pop_2008 = sum(POPESTIMATE2008),
            pop_2009 = sum(POPESTIMATE2009),
            pop_2010 = sum(CENSUS2010POP))

#also make distribution of each age group
#define proportion function
#calculate the proportion of the age group for each state, year
prop <- function(x){x/sum(x)}
pop_by_age_2010_group <- pop_by_age_2010_group %>%
  group_by(NAME) %>%
  mutate_if(is.numeric, prop)

pop_by_age_2019 <- read.csv("./Data/pop_by_age_state_2010_2019.csv")
head(pop_by_age_2019)
##   SUMLEV REGION DIVISION STATE          NAME SEX AGE ESTBASE2010_CIV
## 1     10      0        0     0 United States   0   0         3944160
## 2     10      0        0     0 United States   0   1         3978090
## 3     10      0        0     0 United States   0   2         4096939
## 4     10      0        0     0 United States   0   3         4119051
## 5     10      0        0     0 United States   0   4         4063186
## 6     10      0        0     0 United States   0   5         4056872
##   POPEST2010_CIV POPEST2011_CIV POPEST2012_CIV POPEST2013_CIV POPEST2014_CIV
## 1        3951430        3963092        3926570        3931258        3954787
## 2        3957730        3966225        3977549        3942698        3948891
## 3        4090621        3970654        3978925        3991740        3958711
## 4        4111688        4101644        3981531        3991017        4005928
## 5        4077346        4121488        4111490        3992502        4004032
## 6        4064521        4087054        4131049        4121876        4004576
##   POPEST2015_CIV POPEST2016_CIV POPEST2017_CIV POPEST2018_CIV POPEST2019_CIV
## 1        3983981        3954773        3893990        3815343        3783052
## 2        3973133        4002903        3972711        3908830        3829599
## 3        3966321        3991349        4020045        3987032        3922044
## 4        3974351        3982984        4006946        4033038        3998665
## 5        4020292        3989750        3997280        4018719        4043323
## 6        4017589        4035033        4003452        4008443        4028281
pop_by_age_2019_group <- pop_by_age_2019 %>% 
  #999 indicates total population
  filter(AGE < 999, AGE >= 18) %>%
  #create age groups
  mutate(age_groups = ifelse(AGE >= 18 & AGE <= 24, "18_24", 
                             ifelse(AGE >= 25 & AGE <= 34, "25_34",
                                    ifelse(AGE >= 35 & AGE <= 44, "35_44", 
                                           ifelse(AGE >= 45 & AGE <= 64, "45_64", "65_plus"))))) %>%
  filter(NAME != "United States", SEX == 0) %>%
  group_by(NAME, age_groups) %>%
  summarise(pop_2011 = sum(POPEST2011_CIV),
            pop_2012 = sum(POPEST2012_CIV),
            pop_2013 = sum(POPEST2013_CIV),
            pop_2014 = sum(POPEST2014_CIV),
            pop_2015 = sum(POPEST2015_CIV),
            pop_2016 = sum(POPEST2016_CIV),
            pop_2017 = sum(POPEST2017_CIV),
            pop_2018 = sum(POPEST2018_CIV),
            pop_2019 = sum(POPEST2019_CIV))

#calculate the proportion of the age group for each year, state
pop_by_age_2019_group <- pop_by_age_2019_group %>%
  group_by(NAME) %>%
  mutate_if(is.numeric, prop)

#combine the population and unemployment data to the main analysis dataset
population_prop_data <- merge(pop_by_age_2010_group, pop_by_age_2019_group, by = c("NAME", "age_groups"))

#change the population and unemployment dataset from wide to long
population_prop_data_long <- population_prop_data %>%
  pivot_longer(
    #change the cols that start with "pop"
    cols = starts_with("pop"),
    #put the column names into a col named "year"
    names_to = "year",
    #put the values into a col names "population_prop"
    values_to = "population_prop",
    #delete the "pop_" from the column names
    names_prefix = "pop_"
  )

#now we want to pivot the table to wider so that each age_group population share is a column
population_prop_data_final <- population_prop_data_long %>%
  pivot_wider(
    #each row identified by state and year
    id_cols = c("NAME", "year"),
    #take the column names from the age groups
    names_from = age_groups,
    #take the values from the population proportion column
    values_from = population_prop,
    #add "pop_prop_" to the col names
    names_prefix = "pop_prop_"
  )
#skip the first three rows because that is the header
#the series ID corresponds to the different states in order, so have to rename the IDs -- checked this manually 11/29/21
#we manually retrieved the data from BLS to get seasonally adjusted unemployment rates for each state
unemployment_rate <- readxl::read_excel("./Data/bls_unemployment_rate_2000_2019.xlsx", skip = 3)
unemployment_rate <- unemployment_rate %>%
  mutate(State = unique(main_analysis_data$State))

#change dataset from wide to long
unemployment_rate_long <- unemployment_rate %>%
  pivot_longer(
    cols = is.numeric,
    #add column for the date
    names_to = "time",
    #add column where values are the unemployment rate
    values_to = "unemployment_rate"
  ) %>%
  #remove the columns which are not needed or are all NA
  dplyr::select(-`Series ID`,
         -`Nov\n2021` ,
         -`Dec\n2021`) %>%
  #change date to date object
  mutate(time = mdy(time)) %>%
  #filter to only keep date up to 2019
  filter(year(time) <= 2019)

#since we need 6 month groupings, we take the average per 6 month and state
unemployment_rate_summary <- unemployment_rate_long %>%
  #compute the six month period for each date
  mutate(six_month_pd = floor_date(time, unit = "6 months")) %>%
  #group by state and six month period
  group_by(State, six_month_pd) %>%
  #compute the mean unemployment rate
  summarise(mean_unemployment = mean(unemployment_rate/100))
#finally merge the two datasets into the main analysis dataset
#first create a temporary main analysis data with a year column
main_analysis_data_w_year <- main_analysis_data %>%
  mutate(year = year(Time_Period_Start))
#merge the age group proportion data
sensitivity_anlys_age_gp_unemp <- merge(main_analysis_data_w_year, population_prop_data_final,
                            by.x = c("State", "year"), by.y = c("NAME", "year"))

#merge the unemployment rate
sensitivity_anlys_age_gp_unemp <- merge(sensitivity_anlys_age_gp_unemp, unemployment_rate_summary,
                            by.x = c("State", "Time_Period_Start"), by.y = c("State", "six_month_pd"))

#remove the year column
sensitivity_anlys_age_gp_unemp <- sensitivity_anlys_age_gp_unemp %>%
  dplyr::select(-year) %>%
  mutate(mean_unemployment = mean_unemployment)

11.2 Quick EDA

#do an EDA to check drug overdoses by age group and unemployment rate
ggplot(sensitivity_anlys_age_gp_unemp) + 
  geom_line(aes(y = imputed_deaths/population, x = pop_prop_18_24, color = "18_24")) + 
  geom_line(aes(y = imputed_deaths/population, x = pop_prop_25_34, color = "25_34")) + 
  geom_line(aes(y = imputed_deaths/population, x = pop_prop_35_44, color = "35_44")) + 
  geom_line(aes(y = imputed_deaths/population, x = pop_prop_45_64, color = "45_64")) + 
  geom_line(aes(y = imputed_deaths/population, x = pop_prop_65_plus, color = "65+")) + 
  facet_wrap(~State)

ggplot(sensitivity_anlys_age_gp_unemp) + 
  geom_line( aes(y = (imputed_deaths/population)*100, x = Time_Period_Start, color = "od_rate", group = 1)) + 
  geom_line( aes(y = mean_unemployment, x = Time_Period_Start, color = "unemp", group = 1)) + 
  facet_wrap(~State)

############################## Run Model with Spline Time Effects by Region ###############################
#model that we will be using for the main analysis
#cr is used for cubic regression spline -- we are smoothing time effects by region
#run the analysis for all the states
sensitivity_anlys_age_unemp_model<-gam(cbind(round(imputed_deaths), round(num_alive))~ State +
                                     s(Time_Period_ID, bs = "cr", by = as.factor(Region)) +
                                     Naloxone_Pharmacy_Yes_Redefined +
                                     Naloxone_Pharmacy_No_Redefined +
                                     Medical_Marijuana_Redefined +
                                     Recreational_Marijuana_Redefined +
                                     GSL_Redefined +
                                     PDMP_Redefined +
                                     Medicaid_Expansion_Redefined +
                                     Intervention_Redefined + 
                                     num_states_w_intervention + 
                                     mean_unemployment + 
                                     pop_prop_25_34 + 
                                     pop_prop_35_44 + 
                                     pop_prop_45_64 + 
                                     pop_prop_65_plus,
                                     data = sensitivity_anlys_age_gp_unemp,
                                   family = "binomial")

# summary(sensitivity_anlys_age_unemp_model)

#summary output of the model
stargazer(sensitivity_anlys_age_unemp_model, type = "html", dep.var.labels = c("Unintentional Overdose Deaths"))
Dependent variable:
Unintentional Overdose Deaths
StateAlaska 0.670***
(0.040)
StateArizona 0.353***
(0.017)
StateArkansas -0.414***
(0.020)
StateCalifornia 0.125***
(0.021)
StateColorado 0.371***
(0.023)
StateConnecticut 0.149***
(0.018)
StateDelaware 0.345***
(0.023)
StateFlorida 0.035*
(0.021)
StateGeorgia 0.211***
(0.020)
StateHawaii -0.299***
(0.027)
StateIdaho -0.047*
(0.025)
StateIllinois 0.128***
(0.015)
StateIndiana 0.179***
(0.015)
StateIowa -0.803***
(0.023)
StateKansas -0.293***
(0.021)
StateKentucky 0.691***
(0.014)
StateLouisiana 0.406***
(0.017)
StateMaine -0.115***
(0.031)
StateMaryland -0.950***
(0.022)
StateMassachusetts 0.267***
(0.014)
StateMichigan -0.028*
(0.014)
StateMinnesota -0.540***
(0.019)
StateMississippi -0.019
(0.019)
StateMissouri 0.158***
(0.015)
StateMontana -0.524***
(0.031)
StateNebraska -0.852***
(0.030)
StateNevada 0.622***
(0.020)
StateNew Hampshire 0.159***
(0.024)
StateNew Jersey 0.159***
(0.018)
StateNew Mexico 0.635***
(0.017)
StateNew York -0.161***
(0.014)
StateNorth Carolina 0.264***
(0.013)
StateNorth Dakota -1.017***
(0.050)
StateOhio 0.428***
(0.013)
StateOklahoma 0.425***
(0.017)
StateOregon -0.178***
(0.018)
StatePennsylvania 0.322***
(0.015)
StateRhode Island 0.232***
(0.022)
StateSouth Carolina 0.208***
(0.015)
StateSouth Dakota -1.028***
(0.043)
StateTennessee 0.484***
(0.014)
StateTexas 0.146***
(0.024)
StateUtah 0.574***
(0.048)
StateVermont -0.316***
(0.033)
StateVirginia 0.017
(0.016)
StateWashington 0.233***
(0.018)
StateWest Virginia 0.687***
(0.021)
StateWisconsin -0.052***
(0.015)
StateWyoming 0.032
(0.034)
Naloxone_Pharmacy_Yes_Redefined -0.011
(0.008)
Naloxone_Pharmacy_No_Redefined 0.017**
(0.007)
Medical_Marijuana_Redefined 0.063***
(0.006)
Recreational_Marijuana_Redefined -0.053***
(0.009)
GSL_Redefined 0.013**
(0.006)
PDMP_Redefined -0.045***
(0.006)
Medicaid_Expansion_Redefined 0.088***
(0.006)
Intervention_Redefined 0.062***
(0.006)
num_states_w_intervention 0.005***
(0.002)
mean_unemployment -0.636***
(0.182)
pop_prop_25_34 0.698
(0.594)
pop_prop_35_44 -3.139***
(0.588)
pop_prop_45_64 3.153***
(0.694)
pop_prop_65_plus 5.840***
(0.634)
s(Time_Period_ID):as.factor(Region)Midwest.1
s(Time_Period_ID):as.factor(Region)Midwest.2
s(Time_Period_ID):as.factor(Region)Midwest.3
s(Time_Period_ID):as.factor(Region)Midwest.4
s(Time_Period_ID):as.factor(Region)Midwest.5
s(Time_Period_ID):as.factor(Region)Midwest.6
s(Time_Period_ID):as.factor(Region)Midwest.7
s(Time_Period_ID):as.factor(Region)Midwest.8
s(Time_Period_ID):as.factor(Region)Midwest.9
s(Time_Period_ID):as.factor(Region)Northeast.1
s(Time_Period_ID):as.factor(Region)Northeast.2
s(Time_Period_ID):as.factor(Region)Northeast.3
s(Time_Period_ID):as.factor(Region)Northeast.4
s(Time_Period_ID):as.factor(Region)Northeast.5
s(Time_Period_ID):as.factor(Region)Northeast.6
s(Time_Period_ID):as.factor(Region)Northeast.7
s(Time_Period_ID):as.factor(Region)Northeast.8
s(Time_Period_ID):as.factor(Region)Northeast.9
s(Time_Period_ID):as.factor(Region)South.1
s(Time_Period_ID):as.factor(Region)South.2
s(Time_Period_ID):as.factor(Region)South.3
s(Time_Period_ID):as.factor(Region)South.4
s(Time_Period_ID):as.factor(Region)South.5
s(Time_Period_ID):as.factor(Region)South.6
s(Time_Period_ID):as.factor(Region)South.7
s(Time_Period_ID):as.factor(Region)South.8
s(Time_Period_ID):as.factor(Region)South.9
s(Time_Period_ID):as.factor(Region)West.1
s(Time_Period_ID):as.factor(Region)West.2
s(Time_Period_ID):as.factor(Region)West.3
s(Time_Period_ID):as.factor(Region)West.4
s(Time_Period_ID):as.factor(Region)West.5
s(Time_Period_ID):as.factor(Region)West.6
s(Time_Period_ID):as.factor(Region)West.7
s(Time_Period_ID):as.factor(Region)West.8
s(Time_Period_ID):as.factor(Region)West.9
Constant -11.637***
(0.500)
Observations 2,000
Adjusted R2 0.913
Log Likelihood -16,521.120
UBRE 8.610
Note: p<0.1; p<0.05; p<0.01

11.3 Sandwich Estimator

#here, we estimate the variance-covariance matrix through the sandwich estimator
#we only want to estimate the variances for the policies, intervention variable, and number of states with intervention

sensitivity_anlys_age_unemp_subset_cov <- sensitivity_anlys_age_gp_unemp %>%
  ungroup() %>%
  dplyr::select(
         Naloxone_Pharmacy_Yes_Redefined,
         Naloxone_Pharmacy_No_Redefined,
         Medical_Marijuana_Redefined,
         Recreational_Marijuana_Redefined,
         GSL_Redefined,
         PDMP_Redefined,
         Medicaid_Expansion_Redefined,
         Intervention_Redefined,
         num_states_w_intervention,
         mean_unemployment,
         pop_prop_25_34,
         pop_prop_35_44,
         pop_prop_45_64,
         pop_prop_65_plus)

#estimate the 95% CI and SD
sensitivity_anlys_age_unemp_subset_coefficient_values <- tail(summary(sensitivity_anlys_age_unemp_model)$p.coeff,
                                                              ncol(sensitivity_anlys_age_unemp_subset_cov))
sensitivity_anlys_age_unemp_subset_pred_prob <- predict(sensitivity_anlys_age_unemp_model, 
                                                        newdata = sensitivity_anlys_age_gp_unemp, type = "response")
sensitivity_anlys_age_unemp_subset_sd_and_ci <- compute_sd_and_CI(sensitivity_anlys_age_unemp_subset_cov, 
                                                                  sensitivity_anlys_age_gp_unemp$population,
                                                                  sensitivity_anlys_age_gp_unemp$imputed_deaths,
                                                                  sensitivity_anlys_age_unemp_subset_pred_prob, 
                                                                  sensitivity_anlys_age_unemp_subset_coefficient_values)
sensitivity_anlys_age_unemp_subset_sd_and_ci
##                                       lb_coef  coef_values      ub_coef
## Naloxone_Pharmacy_Yes_Redefined  -0.053662780 -0.011335338  0.030992104
## Naloxone_Pharmacy_No_Redefined   -0.011742544  0.016826931  0.045396406
## Medical_Marijuana_Redefined       0.041978748  0.063473833  0.084968919
## Recreational_Marijuana_Redefined -0.085362237 -0.053064003 -0.020765770
## GSL_Redefined                    -0.023273436  0.012994996  0.049263427
## PDMP_Redefined                   -0.072403764 -0.044885312 -0.017366860
## Medicaid_Expansion_Redefined      0.061030468  0.087510426  0.113990383
## Intervention_Redefined            0.035468127  0.061621342  0.087774556
## num_states_w_intervention         0.003369738  0.005478243  0.007586749
## mean_unemployment                -1.085829563 -0.636266543 -0.186703523
## pop_prop_25_34                   -0.352123019  0.697823342  1.747769703
## pop_prop_35_44                   -4.417798974 -3.139469107 -1.861139241
## pop_prop_45_64                    2.629349243  3.152988396  3.676627548
## pop_prop_65_plus                  5.368064774  5.840360001  6.312655229
##                                        exp_lb     exp_coef      exp_ub
## Naloxone_Pharmacy_Yes_Redefined    0.94775165   0.98872866   1.0314774
## Naloxone_Pharmacy_No_Redefined     0.98832613   1.01696930   1.0464426
## Medical_Marijuana_Redefined        1.04287231   1.06553160   1.0886832
## Recreational_Marijuana_Redefined   0.91817963   0.94831931   0.9794484
## GSL_Redefined                      0.97699530   1.01307980   1.0504970
## PDMP_Redefined                     0.93015526   0.95610713   0.9827831
## Medicaid_Expansion_Redefined       1.06293130   1.09145364   1.1207413
## Intervention_Redefined             1.03610462   1.06355954   1.0917420
## num_states_w_intervention          1.00337542   1.00549328   1.0076156
## mean_unemployment                  0.33762159   0.52926473   0.8296897
## pop_prop_25_34                     0.70319361   2.00937422   5.7417825
## pop_prop_35_44                     0.01206075   0.04330578   0.1554954
## pop_prop_45_64                    13.86474439  23.40590628  39.5129137
## pop_prop_65_plus                 214.44746152 343.90312395 551.5073847
##                                      sd_coef
## Naloxone_Pharmacy_Yes_Redefined  0.021595634
## Naloxone_Pharmacy_No_Redefined   0.014576263
## Medical_Marijuana_Redefined      0.010966880
## Recreational_Marijuana_Redefined 0.016478691
## GSL_Redefined                    0.018504302
## PDMP_Redefined                   0.014040026
## Medicaid_Expansion_Redefined     0.013510183
## Intervention_Redefined           0.013343477
## num_states_w_intervention        0.001075768
## mean_unemployment                0.229368888
## pop_prop_25_34                   0.535686919
## pop_prop_35_44                   0.652209116
## pop_prop_45_64                   0.267162833
## pop_prop_65_plus                 0.240966953